diff mbox series

[v3,03/11] iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct

Message ID 88114b5c725bb3300a9599d3eeebded221a0b1f9.1728491453.git.nicolinc@nvidia.com (mailing list archive)
State New, archived
Headers show
Series cover-letter: iommufd: Add vIOMMU infrastructure (Part-1) | expand

Commit Message

Nicolin Chen Oct. 9, 2024, 4:38 p.m. UTC
Add a new IOMMUFD_OBJ_VIOMMU with an iommufd_viommu structure to represent
a slice of physical IOMMU device passed to or shared with a user space VM.
This slice, now a vIOMMU object, is a group of virtualization resources of
a physical IOMMU's, such as:
 - Security namespace for guest owned ID, e.g. guest-controlled cache tags
 - Access to a sharable nesting parent pagetable across physical IOMMUs
 - Virtualization of various platforms IDs, e.g. RIDs and others
 - Delivery of paravirtualized invalidation
 - Direct assigned invalidation queues
 - Direct assigned interrupts
 - Non-affiliated event reporting

Add a new viommu_alloc op in iommu_ops, for drivers to allocate their own
vIOMMU structures. And this allocation also needs a free(), so add struct
iommufd_viommu_ops.

To simplify a vIOMMU allocation, provide a iommufd_viommu_alloc() helper.
It's suggested that a driver should embed a core-level viommu structure in
its driver-level viommu struct and call the iommufd_viommu_alloc() helper,
meanwhile the driver can also implement a viommu ops:
    struct my_driver_viommu {
        struct iommufd_viommu core;
        /* driver-owned properties/features */
        ....
    };

    static const struct iommufd_viommu_ops my_driver_viommu_ops = {
        .free = my_driver_viommu_free,
        /* future ops for virtualization features */
        ....
    };

    static struct iommufd_viommu my_driver_viommu_alloc(...)
    {
        struct my_driver_viommu *my_viommu =
                iommufd_viommu_alloc(ictx, my_driver_viommu, core,
                                     my_driver_viommu_ops);
        /* Init my_viommu and related HW feature */
        ....
        return &my_viommu->core;
    }

    static struct iommu_domain_ops my_driver_domain_ops = {
        ....
        .viommu_alloc = my_driver_viommu_alloc,
    };

To make the Kernel config work between a driver and the iommufd core, put
the for-driver allocation helpers into a new viommu_api file building with
CONFIG_IOMMUFD_DRIVER.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/iommu/iommufd/Makefile          |  2 +-
 drivers/iommu/iommufd/iommufd_private.h |  1 +
 include/linux/iommu.h                   | 14 ++++++
 include/linux/iommufd.h                 | 43 +++++++++++++++++++
 drivers/iommu/iommufd/main.c            | 32 --------------
 drivers/iommu/iommufd/viommu_api.c      | 57 +++++++++++++++++++++++++
 6 files changed, 116 insertions(+), 33 deletions(-)
 create mode 100644 drivers/iommu/iommufd/viommu_api.c

Comments

Zhangfei Gao Oct. 12, 2024, 3:23 a.m. UTC | #1
On Thu, 10 Oct 2024 at 00:40, Nicolin Chen <nicolinc@nvidia.com> wrote:
>
> Add a new IOMMUFD_OBJ_VIOMMU with an iommufd_viommu structure to represent
> a slice of physical IOMMU device passed to or shared with a user space VM.
> This slice, now a vIOMMU object, is a group of virtualization resources of
> a physical IOMMU's, such as:
>  - Security namespace for guest owned ID, e.g. guest-controlled cache tags
>  - Access to a sharable nesting parent pagetable across physical IOMMUs
>  - Virtualization of various platforms IDs, e.g. RIDs and others
>  - Delivery of paravirtualized invalidation
>  - Direct assigned invalidation queues
>  - Direct assigned interrupts
>  - Non-affiliated event reporting
>
> Add a new viommu_alloc op in iommu_ops, for drivers to allocate their own
> vIOMMU structures. And this allocation also needs a free(), so add struct
> iommufd_viommu_ops.
>
> To simplify a vIOMMU allocation, provide a iommufd_viommu_alloc() helper.
> It's suggested that a driver should embed a core-level viommu structure in
> its driver-level viommu struct and call the iommufd_viommu_alloc() helper,
> meanwhile the driver can also implement a viommu ops:
>     struct my_driver_viommu {
>         struct iommufd_viommu core;
>         /* driver-owned properties/features */
>         ....
>     };
>
>     static const struct iommufd_viommu_ops my_driver_viommu_ops = {
>         .free = my_driver_viommu_free,
>         /* future ops for virtualization features */
>         ....
>     };
>
>     static struct iommufd_viommu my_driver_viommu_alloc(...)
>     {
>         struct my_driver_viommu *my_viommu =
>                 iommufd_viommu_alloc(ictx, my_driver_viommu, core,
>                                      my_driver_viommu_ops);
>         /* Init my_viommu and related HW feature */
>         ....
>         return &my_viommu->core;
>     }
>
>     static struct iommu_domain_ops my_driver_domain_ops = {
>         ....
>         .viommu_alloc = my_driver_viommu_alloc,
>     };
>
> To make the Kernel config work between a driver and the iommufd core, put
> the for-driver allocation helpers into a new viommu_api file building with
> CONFIG_IOMMUFD_DRIVER.
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>

> diff --git a/drivers/iommu/iommufd/viommu_api.c b/drivers/iommu/iommufd/viommu_api.c
> new file mode 100644
> index 000000000000..c1731f080d6b
> --- /dev/null
> +++ b/drivers/iommu/iommufd/viommu_api.c
> @@ -0,0 +1,57 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
> + */
> +
> +#include "iommufd_private.h"
> +
> +struct iommufd_object *iommufd_object_alloc_elm(struct iommufd_ctx *ictx,
> +                                               size_t size,
> +                                               enum iommufd_object_type type)
> +{
> +       struct iommufd_object *obj;
> +       int rc;
> +
> +       obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
> +       if (!obj)
> +               return ERR_PTR(-ENOMEM);
> +       obj->type = type;
> +       /* Starts out bias'd by 1 until it is removed from the xarray */
> +       refcount_set(&obj->shortterm_users, 1);
> +       refcount_set(&obj->users, 1);

here set refcont 1

iommufd_device_bind -> iommufd_object_alloc(ictx, idev,
IOMMUFD_OBJ_DEVICE): refcont -> 1
refcount_inc(&idev->obj.users); refcount -> 2
will cause iommufd_device_unbind fail.

May remove refcount_inc(&idev->obj.users) in iommufd_device_bind

Thanks
Nicolin Chen Oct. 12, 2024, 4:49 a.m. UTC | #2
On Sat, Oct 12, 2024 at 11:23:07AM +0800, Zhangfei Gao wrote:
 
> > diff --git a/drivers/iommu/iommufd/viommu_api.c b/drivers/iommu/iommufd/viommu_api.c
> > new file mode 100644
> > index 000000000000..c1731f080d6b
> > --- /dev/null
> > +++ b/drivers/iommu/iommufd/viommu_api.c
> > @@ -0,0 +1,57 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
> > + */
> > +
> > +#include "iommufd_private.h"
> > +
> > +struct iommufd_object *iommufd_object_alloc_elm(struct iommufd_ctx *ictx,
> > +                                               size_t size,
> > +                                               enum iommufd_object_type type)
> > +{
> > +       struct iommufd_object *obj;
> > +       int rc;
> > +
> > +       obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
> > +       if (!obj)
> > +               return ERR_PTR(-ENOMEM);
> > +       obj->type = type;
> > +       /* Starts out bias'd by 1 until it is removed from the xarray */
> > +       refcount_set(&obj->shortterm_users, 1);
> > +       refcount_set(&obj->users, 1);
> 
> here set refcont 1
> 
> iommufd_device_bind -> iommufd_object_alloc(ictx, idev,
> IOMMUFD_OBJ_DEVICE): refcont -> 1
> refcount_inc(&idev->obj.users); refcount -> 2
> will cause iommufd_device_unbind fail.
> 
> May remove refcount_inc(&idev->obj.users) in iommufd_device_bind

Hmm, why would it fail? Or is it failing on your system?

This patch doesn't change the function but only moved it..

Thanks
Nicolin
Zhangfei Gao Oct. 12, 2024, 10:18 a.m. UTC | #3
On Sat, 12 Oct 2024 at 12:49, Nicolin Chen <nicolinc@nvidia.com> wrote:
>
> On Sat, Oct 12, 2024 at 11:23:07AM +0800, Zhangfei Gao wrote:
>
> > > diff --git a/drivers/iommu/iommufd/viommu_api.c b/drivers/iommu/iommufd/viommu_api.c
> > > new file mode 100644
> > > index 000000000000..c1731f080d6b
> > > --- /dev/null
> > > +++ b/drivers/iommu/iommufd/viommu_api.c
> > > @@ -0,0 +1,57 @@
> > > +// SPDX-License-Identifier: GPL-2.0-only
> > > +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
> > > + */
> > > +
> > > +#include "iommufd_private.h"
> > > +
> > > +struct iommufd_object *iommufd_object_alloc_elm(struct iommufd_ctx *ictx,
> > > +                                               size_t size,
> > > +                                               enum iommufd_object_type type)
> > > +{
> > > +       struct iommufd_object *obj;
> > > +       int rc;
> > > +
> > > +       obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
> > > +       if (!obj)
> > > +               return ERR_PTR(-ENOMEM);
> > > +       obj->type = type;
> > > +       /* Starts out bias'd by 1 until it is removed from the xarray */
> > > +       refcount_set(&obj->shortterm_users, 1);
> > > +       refcount_set(&obj->users, 1);
> >
> > here set refcont 1
> >
> > iommufd_device_bind -> iommufd_object_alloc(ictx, idev,
> > IOMMUFD_OBJ_DEVICE): refcont -> 1
> > refcount_inc(&idev->obj.users); refcount -> 2
> > will cause iommufd_device_unbind fail.
> >
> > May remove refcount_inc(&idev->obj.users) in iommufd_device_bind
>
> Hmm, why would it fail? Or is it failing on your system?

Not sure, still in check, it may only be on my platform.

it hit
iommufd_object_remove:
if (WARN_ON(obj != to_destroy))

iommufd_device_bind refcount=2
iommufd_device_attach refcount=3
//still not sure which operation inc the count?
iommufd_device_detach refcount=4

Thanks



>
> This patch doesn't change the function but only moved it..
>
> Thanks
> Nicolin
Zhangfei Gao Oct. 14, 2024, 7:58 a.m. UTC | #4
Hi, Nico

On Sat, 12 Oct 2024 at 18:18, Zhangfei Gao <zhangfei.gao@linaro.org> wrote:
>
> On Sat, 12 Oct 2024 at 12:49, Nicolin Chen <nicolinc@nvidia.com> wrote:
> >
> > On Sat, Oct 12, 2024 at 11:23:07AM +0800, Zhangfei Gao wrote:
> >
> > > > diff --git a/drivers/iommu/iommufd/viommu_api.c b/drivers/iommu/iommufd/viommu_api.c
> > > > new file mode 100644
> > > > index 000000000000..c1731f080d6b
> > > > --- /dev/null
> > > > +++ b/drivers/iommu/iommufd/viommu_api.c
> > > > @@ -0,0 +1,57 @@
> > > > +// SPDX-License-Identifier: GPL-2.0-only
> > > > +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
> > > > + */
> > > > +
> > > > +#include "iommufd_private.h"
> > > > +
> > > > +struct iommufd_object *iommufd_object_alloc_elm(struct iommufd_ctx *ictx,
> > > > +                                               size_t size,
> > > > +                                               enum iommufd_object_type type)
> > > > +{
> > > > +       struct iommufd_object *obj;
> > > > +       int rc;
> > > > +
> > > > +       obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
> > > > +       if (!obj)
> > > > +               return ERR_PTR(-ENOMEM);
> > > > +       obj->type = type;
> > > > +       /* Starts out bias'd by 1 until it is removed from the xarray */
> > > > +       refcount_set(&obj->shortterm_users, 1);
> > > > +       refcount_set(&obj->users, 1);
> > >
> > > here set refcont 1
> > >
> > > iommufd_device_bind -> iommufd_object_alloc(ictx, idev,
> > > IOMMUFD_OBJ_DEVICE): refcont -> 1
> > > refcount_inc(&idev->obj.users); refcount -> 2
> > > will cause iommufd_device_unbind fail.
> > >
> > > May remove refcount_inc(&idev->obj.users) in iommufd_device_bind
> >
> > Hmm, why would it fail? Or is it failing on your system?
>
> Not sure, still in check, it may only be on my platform.
>
> it hit
> iommufd_object_remove:
> if (WARN_ON(obj != to_destroy))
>
> iommufd_device_bind refcount=2
> iommufd_device_attach refcount=3
> //still not sure which operation inc the count?
> iommufd_device_detach refcount=4
>

Have a question,
when should iommufd_vdevice_destroy be called, before or after
iommufd_device_unbind.

Now iommufd_vdevice_destroy (ref--) is after unbind, hits the if
(!refcount_dec_if_one(&obj->users)) check.

iommufd_device_bind
iommufd_device_attach
iommufd_vdevice_alloc_ioctl

iommufd_device_detach
iommufd_device_unbind // refcount check fail
iommufd_vdevice_destroy ref--

Thanks
Nicolin Chen Oct. 14, 2024, 3:46 p.m. UTC | #5
On Mon, Oct 14, 2024 at 03:58:55PM +0800, Zhangfei Gao wrote:

> > > > > +struct iommufd_object *iommufd_object_alloc_elm(struct iommufd_ctx *ictx,
> > > > > +                                               size_t size,
> > > > > +                                               enum iommufd_object_type type)
> > > > > +{
> > > > > +       struct iommufd_object *obj;
> > > > > +       int rc;
> > > > > +
> > > > > +       obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
> > > > > +       if (!obj)
> > > > > +               return ERR_PTR(-ENOMEM);
> > > > > +       obj->type = type;
> > > > > +       /* Starts out bias'd by 1 until it is removed from the xarray */
> > > > > +       refcount_set(&obj->shortterm_users, 1);
> > > > > +       refcount_set(&obj->users, 1);
> > > >
> > > > here set refcont 1
> > > >
> > > > iommufd_device_bind -> iommufd_object_alloc(ictx, idev,
> > > > IOMMUFD_OBJ_DEVICE): refcont -> 1
> > > > refcount_inc(&idev->obj.users); refcount -> 2
> > > > will cause iommufd_device_unbind fail.
> > > >
> > > > May remove refcount_inc(&idev->obj.users) in iommufd_device_bind
> > >
> > > Hmm, why would it fail? Or is it failing on your system?
> >
> > Not sure, still in check, it may only be on my platform.
> >
> > it hit
> > iommufd_object_remove:
> > if (WARN_ON(obj != to_destroy))
> >
> > iommufd_device_bind refcount=2
> > iommufd_device_attach refcount=3
> > //still not sure which operation inc the count?
> > iommufd_device_detach refcount=4
> >
> 
> Have a question,
> when should iommufd_vdevice_destroy be called, before or after
> iommufd_device_unbind.

Before.

> Now iommufd_vdevice_destroy (ref--) is after unbind, hits the if
> (!refcount_dec_if_one(&obj->users)) check.

Hmm, where do we have an iommufd_vdevice_destroy after unbind?

> iommufd_device_bind
> iommufd_device_attach
> iommufd_vdevice_alloc_ioctl
> 
> iommufd_device_detach
> iommufd_device_unbind // refcount check fail
> iommufd_vdevice_destroy ref--

Things should be symmetric. As you suspected, vdevice should be
destroyed before iommufd_device_detach.

A vdev is an object on top of a vIOMMU obj and an idev obj, so
it takes a refcount from each of them. That's why idev couldn't
unbind.

Thanks
Nicolin
Zhangfei Gao Oct. 15, 2024, 1:15 a.m. UTC | #6
On Mon, 14 Oct 2024 at 23:46, Nicolin Chen <nicolinc@nvidia.com> wrote:
>
> On Mon, Oct 14, 2024 at 03:58:55PM +0800, Zhangfei Gao wrote:
>
> > > > > > +struct iommufd_object *iommufd_object_alloc_elm(struct iommufd_ctx *ictx,
> > > > > > +                                               size_t size,
> > > > > > +                                               enum iommufd_object_type type)
> > > > > > +{
> > > > > > +       struct iommufd_object *obj;
> > > > > > +       int rc;
> > > > > > +
> > > > > > +       obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
> > > > > > +       if (!obj)
> > > > > > +               return ERR_PTR(-ENOMEM);
> > > > > > +       obj->type = type;
> > > > > > +       /* Starts out bias'd by 1 until it is removed from the xarray */
> > > > > > +       refcount_set(&obj->shortterm_users, 1);
> > > > > > +       refcount_set(&obj->users, 1);
> > > > >
> > > > > here set refcont 1
> > > > >
> > > > > iommufd_device_bind -> iommufd_object_alloc(ictx, idev,
> > > > > IOMMUFD_OBJ_DEVICE): refcont -> 1
> > > > > refcount_inc(&idev->obj.users); refcount -> 2
> > > > > will cause iommufd_device_unbind fail.
> > > > >
> > > > > May remove refcount_inc(&idev->obj.users) in iommufd_device_bind
> > > >
> > > > Hmm, why would it fail? Or is it failing on your system?
> > >
> > > Not sure, still in check, it may only be on my platform.
> > >
> > > it hit
> > > iommufd_object_remove:
> > > if (WARN_ON(obj != to_destroy))
> > >
> > > iommufd_device_bind refcount=2
> > > iommufd_device_attach refcount=3
> > > //still not sure which operation inc the count?
> > > iommufd_device_detach refcount=4
> > >
> >
> > Have a question,
> > when should iommufd_vdevice_destroy be called, before or after
> > iommufd_device_unbind.
>
> Before.
>
> > Now iommufd_vdevice_destroy (ref--) is after unbind, hits the if
> > (!refcount_dec_if_one(&obj->users)) check.
>
> Hmm, where do we have an iommufd_vdevice_destroy after unbind?

Looks like it is called by close fd?
[ 2480.909319]  iommufd_vdevice_destroy+0xdc/0x168
[ 2480.909324]  iommufd_fops_release+0xa4/0x140
[ 2480.909328]  __fput+0xd0/0x2e0
[ 2480.909331]  ____fput+0x1c/0x30
[ 2480.909333]  task_work_run+0x78/0xe0
[ 2480.909337]  do_exit+0x2fc/0xa50
[ 2480.909340]  do_group_exit+0x3c/0xa0
[ 2480.909344]  get_signal+0x96c/0x978
[ 2480.909346]  do_signal+0x94/0x3a8
[ 2480.909348]  do_notify_resume+0x100/0x190

>
> > iommufd_device_bind
> > iommufd_device_attach
> > iommufd_vdevice_alloc_ioctl
> >
> > iommufd_device_detach
> > iommufd_device_unbind // refcount check fail
> > iommufd_vdevice_destroy ref--
>
> Things should be symmetric. As you suspected, vdevice should be
> destroyed before iommufd_device_detach.

I am trying based on your for_iommufd_viommu_p2-v3 branch, do you have
this issue?
In checking whether close fd before unbind?

>
> A vdev is an object on top of a vIOMMU obj and an idev obj, so
> it takes a refcount from each of them. That's why idev couldn't
> unbind.

Thanks

>
> Thanks
> Nicolin
Nicolin Chen Oct. 15, 2024, 2:01 a.m. UTC | #7
On Tue, Oct 15, 2024 at 09:15:01AM +0800, Zhangfei Gao wrote:

> > > iommufd_device_bind
> > > iommufd_device_attach
> > > iommufd_vdevice_alloc_ioctl
> > >
> > > iommufd_device_detach
> > > iommufd_device_unbind // refcount check fail
> > > iommufd_vdevice_destroy ref--
> >
> > Things should be symmetric. As you suspected, vdevice should be
> > destroyed before iommufd_device_detach.
> 
> I am trying based on your for_iommufd_viommu_p2-v3 branch, do you have
> this issue?
> In checking whether close fd before unbind?

Oops, my bad. I will provide a fix.

Nicolin
Nicolin Chen Oct. 15, 2024, 6:44 p.m. UTC | #8
On Mon, Oct 14, 2024 at 07:01:40PM -0700, Nicolin Chen wrote:
> On Tue, Oct 15, 2024 at 09:15:01AM +0800, Zhangfei Gao wrote:
> 
> > > > iommufd_device_bind
> > > > iommufd_device_attach
> > > > iommufd_vdevice_alloc_ioctl
> > > >
> > > > iommufd_device_detach
> > > > iommufd_device_unbind // refcount check fail
> > > > iommufd_vdevice_destroy ref--
> > >
> > > Things should be symmetric. As you suspected, vdevice should be
> > > destroyed before iommufd_device_detach.
> > 
> > I am trying based on your for_iommufd_viommu_p2-v3 branch, do you have
> > this issue?
> > In checking whether close fd before unbind?
> 
> Oops, my bad. I will provide a fix.

This should fix the problem:

---------------------------------------------------------------------
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 5fd3dd420290..13100cfea29d 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -277,6 +277,11 @@ EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD);
  */
 void iommufd_device_unbind(struct iommufd_device *idev)
 {
+	mutex_lock(&idev->igroup->lock);
+	/* idev->vdev object should be destroyed prior, yet just in case.. */
+	if (idev->vdev)
+		iommufd_object_remove(idev->ictx, NULL, idev->vdev->obj.id, 0);
+	mutex_unlock(&idev->igroup->lock);
 	iommufd_object_destroy_user(idev->ictx, &idev->obj);
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
---------------------------------------------------------------------

Thanks
Nicolin
Zhangfei Gao Oct. 16, 2024, 1:56 a.m. UTC | #9
On Wed, 16 Oct 2024 at 02:44, Nicolin Chen <nicolinc@nvidia.com> wrote:
>
> On Mon, Oct 14, 2024 at 07:01:40PM -0700, Nicolin Chen wrote:
> > On Tue, Oct 15, 2024 at 09:15:01AM +0800, Zhangfei Gao wrote:
> >
> > > > > iommufd_device_bind
> > > > > iommufd_device_attach
> > > > > iommufd_vdevice_alloc_ioctl
> > > > >
> > > > > iommufd_device_detach
> > > > > iommufd_device_unbind // refcount check fail
> > > > > iommufd_vdevice_destroy ref--
> > > >
> > > > Things should be symmetric. As you suspected, vdevice should be
> > > > destroyed before iommufd_device_detach.
> > >
> > > I am trying based on your for_iommufd_viommu_p2-v3 branch, do you have
> > > this issue?
> > > In checking whether close fd before unbind?
> >
> > Oops, my bad. I will provide a fix.
>
> This should fix the problem:
>
> ---------------------------------------------------------------------
> diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
> index 5fd3dd420290..13100cfea29d 100644
> --- a/drivers/iommu/iommufd/device.c
> +++ b/drivers/iommu/iommufd/device.c
> @@ -277,6 +277,11 @@ EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD);
>   */
>  void iommufd_device_unbind(struct iommufd_device *idev)
>  {
> +       mutex_lock(&idev->igroup->lock);
> +       /* idev->vdev object should be destroyed prior, yet just in case.. */
> +       if (idev->vdev)
> +               iommufd_object_remove(idev->ictx, NULL, idev->vdev->obj.id, 0);
> +       mutex_unlock(&idev->igroup->lock);
>         iommufd_object_destroy_user(idev->ictx, &idev->obj);
>  }
>  EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
> ---------------------------------------------------------------------

Not yet
[  574.162112] Unable to handle kernel NULL pointer dereference at
virtual address 0000000000000004
[  574.261102] pc : iommufd_object_remove+0x7c/0x278
[  574.265795] lr : iommufd_device_unbind+0x44/0x98
in check

Thanks

>
> Thanks
> Nicolin
Nicolin Chen Oct. 16, 2024, 6:51 a.m. UTC | #10
On Wed, Oct 16, 2024 at 09:56:51AM +0800, Zhangfei Gao wrote:
> On Wed, 16 Oct 2024 at 02:44, Nicolin Chen <nicolinc@nvidia.com> wrote:
> >
> > On Mon, Oct 14, 2024 at 07:01:40PM -0700, Nicolin Chen wrote:
> > > On Tue, Oct 15, 2024 at 09:15:01AM +0800, Zhangfei Gao wrote:
> > >
> > > > > > iommufd_device_bind
> > > > > > iommufd_device_attach
> > > > > > iommufd_vdevice_alloc_ioctl
> > > > > >
> > > > > > iommufd_device_detach
> > > > > > iommufd_device_unbind // refcount check fail
> > > > > > iommufd_vdevice_destroy ref--
> > > > >
> > > > > Things should be symmetric. As you suspected, vdevice should be
> > > > > destroyed before iommufd_device_detach.
> > > >
> > > > I am trying based on your for_iommufd_viommu_p2-v3 branch, do you have
> > > > this issue?
> > > > In checking whether close fd before unbind?
> > >
> > > Oops, my bad. I will provide a fix.
> >
> > This should fix the problem:
> >
> > ---------------------------------------------------------------------
> > diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
> > index 5fd3dd420290..13100cfea29d 100644
> > --- a/drivers/iommu/iommufd/device.c
> > +++ b/drivers/iommu/iommufd/device.c
> > @@ -277,6 +277,11 @@ EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD);
> >   */
> >  void iommufd_device_unbind(struct iommufd_device *idev)
> >  {
> > +       mutex_lock(&idev->igroup->lock);
> > +       /* idev->vdev object should be destroyed prior, yet just in case.. */
> > +       if (idev->vdev)
> > +               iommufd_object_remove(idev->ictx, NULL, idev->vdev->obj.id, 0);
> > +       mutex_unlock(&idev->igroup->lock);
> >         iommufd_object_destroy_user(idev->ictx, &idev->obj);
> >  }
> >  EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
> > ---------------------------------------------------------------------
> 
> Not yet
> [  574.162112] Unable to handle kernel NULL pointer dereference at
> virtual address 0000000000000004
> [  574.261102] pc : iommufd_object_remove+0x7c/0x278
> [  574.265795] lr : iommufd_device_unbind+0x44/0x98
> in check

Hmm, it's kinda odd it crashes inside iommufd_object_remove().
Did you happen to change something there?

The added iommufd_object_remove() is equivalent to userspace
calling the destroy ioctl on the vDEVICE object.

Nicolin
Zhangfei Gao Oct. 16, 2024, 7:08 a.m. UTC | #11
On Wed, 16 Oct 2024 at 14:52, Nicolin Chen <nicolinc@nvidia.com> wrote:
>
> On Wed, Oct 16, 2024 at 09:56:51AM +0800, Zhangfei Gao wrote:
> > On Wed, 16 Oct 2024 at 02:44, Nicolin Chen <nicolinc@nvidia.com> wrote:
> > >
> > > On Mon, Oct 14, 2024 at 07:01:40PM -0700, Nicolin Chen wrote:
> > > > On Tue, Oct 15, 2024 at 09:15:01AM +0800, Zhangfei Gao wrote:
> > > >
> > > > > > > iommufd_device_bind
> > > > > > > iommufd_device_attach
> > > > > > > iommufd_vdevice_alloc_ioctl
> > > > > > >
> > > > > > > iommufd_device_detach
> > > > > > > iommufd_device_unbind // refcount check fail
> > > > > > > iommufd_vdevice_destroy ref--
> > > > > >
> > > > > > Things should be symmetric. As you suspected, vdevice should be
> > > > > > destroyed before iommufd_device_detach.
> > > > >
> > > > > I am trying based on your for_iommufd_viommu_p2-v3 branch, do you have
> > > > > this issue?
> > > > > In checking whether close fd before unbind?
> > > >
> > > > Oops, my bad. I will provide a fix.
> > >
> > > This should fix the problem:
> > >
> > > ---------------------------------------------------------------------
> > > diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
> > > index 5fd3dd420290..13100cfea29d 100644
> > > --- a/drivers/iommu/iommufd/device.c
> > > +++ b/drivers/iommu/iommufd/device.c
> > > @@ -277,6 +277,11 @@ EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD);
> > >   */
> > >  void iommufd_device_unbind(struct iommufd_device *idev)
> > >  {
> > > +       mutex_lock(&idev->igroup->lock);
> > > +       /* idev->vdev object should be destroyed prior, yet just in case.. */
> > > +       if (idev->vdev)
> > > +               iommufd_object_remove(idev->ictx, NULL, idev->vdev->obj.id, 0);
> > > +       mutex_unlock(&idev->igroup->lock);
> > >         iommufd_object_destroy_user(idev->ictx, &idev->obj);
> > >  }
> > >  EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
> > > ---------------------------------------------------------------------
> >
> > Not yet
> > [  574.162112] Unable to handle kernel NULL pointer dereference at
> > virtual address 0000000000000004
> > [  574.261102] pc : iommufd_object_remove+0x7c/0x278
> > [  574.265795] lr : iommufd_device_unbind+0x44/0x98
> > in check
>
> Hmm, it's kinda odd it crashes inside iommufd_object_remove().
> Did you happen to change something there?
>
> The added iommufd_object_remove() is equivalent to userspace
> calling the destroy ioctl on the vDEVICE object.
>
Yes, double confirmed, it can solve the issue.
The guest can stop and run again

The Null pointer may be caused by the added debug.

Thanks Nico.

> Nicolin
Jason Gunthorpe Oct. 17, 2024, 4:33 p.m. UTC | #12
On Wed, Oct 09, 2024 at 09:38:03AM -0700, Nicolin Chen wrote:
> +	struct iommufd_viommu *(*viommu_alloc)(struct iommu_device *iommu_dev,
> +					       struct iommu_domain *domain,

Let's call this parent_domain ie nesting parent

> +/*
> + * Helpers for IOMMU driver to allocate driver structures that will be freed by
> + * the iommufd core. Yet, a driver is responsible for its own
> struct cleanup.

'own struct cleanup via the free callback'

> diff --git a/drivers/iommu/iommufd/viommu_api.c b/drivers/iommu/iommufd/viommu_api.c
> new file mode 100644
> index 000000000000..c1731f080d6b
> --- /dev/null
> +++ b/drivers/iommu/iommufd/viommu_api.c

Let's call this file driver.c to match CONFIG_IOMMUFD_DRIVER

> +struct iommufd_viommu *
> +__iommufd_viommu_alloc(struct iommufd_ctx *ictx, size_t size,
> +		       const struct iommufd_viommu_ops *ops)
> +{
> +	struct iommufd_viommu *viommu;
> +	struct iommufd_object *obj;
> +
> +	if (WARN_ON(size < sizeof(*viommu)))
> +		return ERR_PTR(-EINVAL);

The macro does this already

> +	obj = iommufd_object_alloc_elm(ictx, size, IOMMUFD_OBJ_VIOMMU);
> +	if (IS_ERR(obj))
> +		return ERR_CAST(obj);
> +	viommu = container_of(obj, struct iommufd_viommu, obj);

And this too..

> +	if (ops)
> +		viommu->ops = ops;

No need for an if...

It could just be in the macro which is a bit appealing given we want
this linked into the drivers..

/*
 * Helpers for IOMMU driver to allocate driver structures that will be freed by
 * the iommufd core. The free op will be called prior to freeing the memory.
 */
#define iommufd_viommu_alloc(ictx, drv_struct, member, viommu_ops)            \
	({                                                                    \
		struct drv_struct *ret;                                       \
                                                                              \
		static_assert(                                                \
			__same_type(struct iommufd_viommu,                    \
				    ((struct drv_struct *)NULL)->member));    \
		static_assert(offsetof(struct drv_struct, member.obj) == 0);  \
		ret = (struct drv_struct *)iommufd_object_alloc_elm(          \
			ictx, sizeof(struct drv_struct), IOMMUFD_OBJ_VIOMMU); \
		ret->member.ops = viommu_ops;                                 \
		ret;                                                          \
	})

Jason
Nicolin Chen Oct. 17, 2024, 5:01 p.m. UTC | #13
On Thu, Oct 17, 2024 at 01:33:59PM -0300, Jason Gunthorpe wrote:

> > diff --git a/drivers/iommu/iommufd/viommu_api.c b/drivers/iommu/iommufd/viommu_api.c
> > new file mode 100644
> > index 000000000000..c1731f080d6b
> > --- /dev/null
> > +++ b/drivers/iommu/iommufd/viommu_api.c
> 
> Let's call this file driver.c to match CONFIG_IOMMUFD_DRIVER

Would this make its scope too large that it might feel odd to have
the iova_bitmap.c in a separate file?

> /*
>  * Helpers for IOMMU driver to allocate driver structures that will be freed by
>  * the iommufd core. The free op will be called prior to freeing the memory.
>  */
> #define iommufd_viommu_alloc(ictx, drv_struct, member, viommu_ops)            \
> 	({                                                                    \
> 		struct drv_struct *ret;                                       \
>                                                                               \
> 		static_assert(                                                \
> 			__same_type(struct iommufd_viommu,                    \
> 				    ((struct drv_struct *)NULL)->member));    \
> 		static_assert(offsetof(struct drv_struct, member.obj) == 0);  \
> 		ret = (struct drv_struct *)iommufd_object_alloc_elm(          \
> 			ictx, sizeof(struct drv_struct), IOMMUFD_OBJ_VIOMMU); \
> 		ret->member.ops = viommu_ops;                                 \
> 		ret;                                                          \
> 	})

OK. Will try with this.

Thanks
Nicolin
Jason Gunthorpe Oct. 17, 2024, 5:07 p.m. UTC | #14
On Thu, Oct 17, 2024 at 10:01:28AM -0700, Nicolin Chen wrote:
> On Thu, Oct 17, 2024 at 01:33:59PM -0300, Jason Gunthorpe wrote:
> 
> > > diff --git a/drivers/iommu/iommufd/viommu_api.c b/drivers/iommu/iommufd/viommu_api.c
> > > new file mode 100644
> > > index 000000000000..c1731f080d6b
> > > --- /dev/null
> > > +++ b/drivers/iommu/iommufd/viommu_api.c
> > 
> > Let's call this file driver.c to match CONFIG_IOMMUFD_DRIVER
> 
> Would this make its scope too large that it might feel odd to have
> the iova_bitmap.c in a separate file?

Think of it as the catchall ?

Jason
diff mbox series

Patch

diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index cf4605962bea..93daedd7e5c8 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -12,4 +12,4 @@  iommufd-y := \
 iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o
 
 obj-$(CONFIG_IOMMUFD) += iommufd.o
-obj-$(CONFIG_IOMMUFD_DRIVER) += iova_bitmap.o
+obj-$(CONFIG_IOMMUFD_DRIVER) += iova_bitmap.o viommu_api.o
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index f2f3a906eac9..6a364073f699 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -131,6 +131,7 @@  enum iommufd_object_type {
 	IOMMUFD_OBJ_IOAS,
 	IOMMUFD_OBJ_ACCESS,
 	IOMMUFD_OBJ_FAULT,
+	IOMMUFD_OBJ_VIOMMU,
 #ifdef CONFIG_IOMMUFD_TEST
 	IOMMUFD_OBJ_SELFTEST,
 #endif
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index c8d18f5f644e..3a50f57b0861 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -42,6 +42,8 @@  struct notifier_block;
 struct iommu_sva;
 struct iommu_dma_cookie;
 struct iommu_fault_param;
+struct iommufd_ctx;
+struct iommufd_viommu;
 
 #define IOMMU_FAULT_PERM_READ	(1 << 0) /* read */
 #define IOMMU_FAULT_PERM_WRITE	(1 << 1) /* write */
@@ -542,6 +544,13 @@  static inline int __iommu_copy_struct_from_user_array(
  * @remove_dev_pasid: Remove any translation configurations of a specific
  *                    pasid, so that any DMA transactions with this pasid
  *                    will be blocked by the hardware.
+ * @viommu_alloc: Allocate an iommufd_viommu on an @iommu_dev as the group of
+ *                virtualization resources shared/passed to user space IOMMU
+ *                instance. Associate it with a nesting parent @domain. The
+ *                @viommu_type must be defined in include/uapi/linux/iommufd.h
+ *                It is suggested to call iommufd_viommu_alloc() helper for
+ *                a bundled allocation of the core and the driver structures,
+ *                using the given @ictx pointer.
  * @pgsize_bitmap: bitmap of all possible supported page sizes
  * @owner: Driver module providing these ops
  * @identity_domain: An always available, always attachable identity
@@ -591,6 +600,11 @@  struct iommu_ops {
 	void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid,
 				 struct iommu_domain *domain);
 
+	struct iommufd_viommu *(*viommu_alloc)(struct iommu_device *iommu_dev,
+					       struct iommu_domain *domain,
+					       struct iommufd_ctx *ictx,
+					       unsigned int viommu_type);
+
 	const struct iommu_domain_ops *default_domain_ops;
 	unsigned long pgsize_bitmap;
 	struct module *owner;
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 6b9d46981870..069a38999cdd 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -17,6 +17,7 @@  struct iommu_group;
 struct iommufd_access;
 struct iommufd_ctx;
 struct iommufd_device;
+struct iommufd_viommu_ops;
 struct page;
 
 /* Base struct for all objects with a userspace ID handle. */
@@ -63,6 +64,26 @@  void iommufd_access_detach(struct iommufd_access *access);
 
 void iommufd_ctx_get(struct iommufd_ctx *ictx);
 
+struct iommufd_viommu {
+	struct iommufd_object obj;
+	struct iommufd_ctx *ictx;
+	struct iommu_device *iommu_dev;
+	struct iommufd_hwpt_paging *hwpt;
+
+	const struct iommufd_viommu_ops *ops;
+
+	unsigned int type;
+};
+
+/**
+ * struct iommufd_viommu_ops - vIOMMU specific operations
+ * @free: Free all driver-specific parts of an iommufd_viommu. The memory of the
+ *        vIOMMU will be free-ed by iommufd core after calling this free op.
+ */
+struct iommufd_viommu_ops {
+	void (*free)(struct iommufd_viommu *viommu);
+};
+
 #if IS_ENABLED(CONFIG_IOMMUFD)
 struct iommufd_ctx *iommufd_ctx_from_file(struct file *file);
 struct iommufd_ctx *iommufd_ctx_from_fd(int fd);
@@ -79,6 +100,9 @@  int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
 int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id);
 int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx);
 int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx);
+struct iommufd_viommu *
+__iommufd_viommu_alloc(struct iommufd_ctx *ictx, size_t size,
+		       const struct iommufd_viommu_ops *ops);
 #else /* !CONFIG_IOMMUFD */
 static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file)
 {
@@ -119,5 +143,24 @@  static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx)
 {
 	return -EOPNOTSUPP;
 }
+
+static inline struct iommufd_viommu *
+__iommufd_viommu_alloc(struct iommufd_ctx *ictx, size_t size,
+		       const struct iommufd_viommu_ops *ops)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
 #endif /* CONFIG_IOMMUFD */
+
+/*
+ * Helpers for IOMMU driver to allocate driver structures that will be freed by
+ * the iommufd core. Yet, a driver is responsible for its own struct cleanup.
+ */
+#define iommufd_viommu_alloc(ictx, drv_struct, member, ops)                    \
+	container_of(__iommufd_viommu_alloc(ictx,                              \
+					    sizeof(struct drv_struct) +        \
+					    BUILD_BUG_ON_ZERO(offsetof(        \
+						struct drv_struct, member)),   \
+					    ops),                              \
+		     struct drv_struct, member)
 #endif
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 28e1ef5666e9..92bd075108e5 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -29,38 +29,6 @@  struct iommufd_object_ops {
 static const struct iommufd_object_ops iommufd_object_ops[];
 static struct miscdevice vfio_misc_dev;
 
-struct iommufd_object *iommufd_object_alloc_elm(struct iommufd_ctx *ictx,
-						size_t size,
-						enum iommufd_object_type type)
-{
-	struct iommufd_object *obj;
-	int rc;
-
-	obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
-	if (!obj)
-		return ERR_PTR(-ENOMEM);
-	obj->type = type;
-	/* Starts out bias'd by 1 until it is removed from the xarray */
-	refcount_set(&obj->shortterm_users, 1);
-	refcount_set(&obj->users, 1);
-
-	/*
-	 * Reserve an ID in the xarray but do not publish the pointer yet since
-	 * the caller hasn't initialized it yet. Once the pointer is published
-	 * in the xarray and visible to other threads we can't reliably destroy
-	 * it anymore, so the caller must complete all errorable operations
-	 * before calling iommufd_object_finalize().
-	 */
-	rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY,
-		      xa_limit_31b, GFP_KERNEL_ACCOUNT);
-	if (rc)
-		goto out_free;
-	return obj;
-out_free:
-	kfree(obj);
-	return ERR_PTR(rc);
-}
-
 /*
  * Allow concurrent access to the object.
  *
diff --git a/drivers/iommu/iommufd/viommu_api.c b/drivers/iommu/iommufd/viommu_api.c
new file mode 100644
index 000000000000..c1731f080d6b
--- /dev/null
+++ b/drivers/iommu/iommufd/viommu_api.c
@@ -0,0 +1,57 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ */
+
+#include "iommufd_private.h"
+
+struct iommufd_object *iommufd_object_alloc_elm(struct iommufd_ctx *ictx,
+						size_t size,
+						enum iommufd_object_type type)
+{
+	struct iommufd_object *obj;
+	int rc;
+
+	obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+	obj->type = type;
+	/* Starts out bias'd by 1 until it is removed from the xarray */
+	refcount_set(&obj->shortterm_users, 1);
+	refcount_set(&obj->users, 1);
+
+	/*
+	 * Reserve an ID in the xarray but do not publish the pointer yet since
+	 * the caller hasn't initialized it yet. Once the pointer is published
+	 * in the xarray and visible to other threads we can't reliably destroy
+	 * it anymore, so the caller must complete all errorable operations
+	 * before calling iommufd_object_finalize().
+	 */
+	rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY,
+		      xa_limit_31b, GFP_KERNEL_ACCOUNT);
+	if (rc)
+		goto out_free;
+	return obj;
+out_free:
+	kfree(obj);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_object_alloc_elm, IOMMUFD);
+
+struct iommufd_viommu *
+__iommufd_viommu_alloc(struct iommufd_ctx *ictx, size_t size,
+		       const struct iommufd_viommu_ops *ops)
+{
+	struct iommufd_viommu *viommu;
+	struct iommufd_object *obj;
+
+	if (WARN_ON(size < sizeof(*viommu)))
+		return ERR_PTR(-EINVAL);
+	obj = iommufd_object_alloc_elm(ictx, size, IOMMUFD_OBJ_VIOMMU);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+	viommu = container_of(obj, struct iommufd_viommu, obj);
+	if (ops)
+		viommu->ops = ops;
+	return viommu;
+}
+EXPORT_SYMBOL_NS_GPL(__iommufd_viommu_alloc, IOMMUFD);