diff mbox series

[RFC,v6,09/24] vfio: Force nested if iommu requires it

Message ID 20200320165840.30057-10-eric.auger@redhat.com (mailing list archive)
State New, archived
Headers show
Series vSMMUv3/pSMMUv3 2 stage VFIO integration | expand

Commit Message

Eric Auger March 20, 2020, 4:58 p.m. UTC
In case we detect the address space is translated by
a virtual IOMMU which requires HW nested paging to
integrate with VFIO, let's set up the container with
the VFIO_TYPE1_NESTING_IOMMU iommu_type.

Signed-off-by: Eric Auger <eric.auger@redhat.com>

---

v4 -> v5:
- fail immediatly if nested is wanted but not supported

v2 -> v3:
- add "nested only is selected if requested by @force_nested"
  comment in this patch
---
 hw/vfio/common.c | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

Comments

Yi Liu March 31, 2020, 6:34 a.m. UTC | #1
Hi Eric,

> From: Eric Auger <eric.auger@redhat.com>
> Sent: Saturday, March 21, 2020 12:58 AM
> To: eric.auger.pro@gmail.com; eric.auger@redhat.com; qemu-devel@nongnu.org;
> Subject: [RFC v6 09/24] vfio: Force nested if iommu requires it
> 
> In case we detect the address space is translated by
> a virtual IOMMU which requires HW nested paging to
> integrate with VFIO, let's set up the container with
> the VFIO_TYPE1_NESTING_IOMMU iommu_type.
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> 
> ---
> 
> v4 -> v5:
> - fail immediatly if nested is wanted but not supported
> 
> v2 -> v3:
> - add "nested only is selected if requested by @force_nested"
>   comment in this patch
> ---
>  hw/vfio/common.c | 36 ++++++++++++++++++++++++++++--------
>  1 file changed, 28 insertions(+), 8 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 0b3593b3c0..ac417b5dbd 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -1155,27 +1155,38 @@ static void vfio_put_address_space(VFIOAddressSpace
> *space)
>   * vfio_get_iommu_type - selects the richest iommu_type (v2 first)
>   */
>  static int vfio_get_iommu_type(VFIOContainer *container,
> +                               bool want_nested,
>                                 Error **errp)
>  {
> -    int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
> +    int iommu_types[] = { VFIO_TYPE1_NESTING_IOMMU,
> +                          VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
>                            VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
> -    int i;
> +    int i, ret = -EINVAL;
> 
>      for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
>          if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
> -            return iommu_types[i];
> +            if (iommu_types[i] == VFIO_TYPE1_NESTING_IOMMU && !want_nested) {
> +                continue;
> +            }
> +            ret = iommu_types[i];
> +            break;
>          }
>      }
> -    error_setg(errp, "No available IOMMU models");
> -    return -EINVAL;
> +    if (ret < 0) {
> +        error_setg(errp, "No available IOMMU models");
> +    } else if (want_nested && ret != VFIO_TYPE1_NESTING_IOMMU) {
> +        error_setg(errp, "Nested mode requested but not supported");
> +        ret = -EINVAL;
> +    }
> +    return ret;
>  }
> 
>  static int vfio_init_container(VFIOContainer *container, int group_fd,
> -                               Error **errp)
> +                               bool want_nested, Error **errp)
>  {
>      int iommu_type, ret;
> 
> -    iommu_type = vfio_get_iommu_type(container, errp);
> +    iommu_type = vfio_get_iommu_type(container, want_nested, errp);
>      if (iommu_type < 0) {
>          return iommu_type;
>      }
> @@ -1211,6 +1222,14 @@ static int vfio_connect_container(VFIOGroup *group,
> AddressSpace *as,
>      VFIOContainer *container;
>      int ret, fd;
>      VFIOAddressSpace *space;
> +    IOMMUMemoryRegion *iommu_mr;
> +    bool nested = false;
> +
> +    if (as != &address_space_memory && memory_region_is_iommu(as->root)) {

I tried on my side. For virtual VT-d, it doesn't work as in intel_iommu,
we have a dynamic switch mechanism. Thus that, the
memory_region_is_iommu(as->root) won't return true as expected. I'm afraid
it doesn't work for virtual VT-d.  So firstly, I'm wondering if
as != &address_space_memory is enough. Secondly, I'm considering if it is
good to let vfio_get_group() caller to provide a hint whether vIOMMU is
exposed. e.g. vfio_realize() in vfio/pci.c could figure out whether vIOMMU
is set easily. Thoughts?

Regards,
Yi Liu
Eric Auger March 31, 2020, 8:04 a.m. UTC | #2
Yi,

On 3/31/20 8:34 AM, Liu, Yi L wrote:
> Hi Eric,
> 
>> From: Eric Auger <eric.auger@redhat.com>
>> Sent: Saturday, March 21, 2020 12:58 AM
>> To: eric.auger.pro@gmail.com; eric.auger@redhat.com; qemu-devel@nongnu.org;
>> Subject: [RFC v6 09/24] vfio: Force nested if iommu requires it
>>
>> In case we detect the address space is translated by
>> a virtual IOMMU which requires HW nested paging to
>> integrate with VFIO, let's set up the container with
>> the VFIO_TYPE1_NESTING_IOMMU iommu_type.
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>
>> ---
>>
>> v4 -> v5:
>> - fail immediatly if nested is wanted but not supported
>>
>> v2 -> v3:
>> - add "nested only is selected if requested by @force_nested"
>>   comment in this patch
>> ---
>>  hw/vfio/common.c | 36 ++++++++++++++++++++++++++++--------
>>  1 file changed, 28 insertions(+), 8 deletions(-)
>>
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 0b3593b3c0..ac417b5dbd 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -1155,27 +1155,38 @@ static void vfio_put_address_space(VFIOAddressSpace
>> *space)
>>   * vfio_get_iommu_type - selects the richest iommu_type (v2 first)
>>   */
>>  static int vfio_get_iommu_type(VFIOContainer *container,
>> +                               bool want_nested,
>>                                 Error **errp)
>>  {
>> -    int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
>> +    int iommu_types[] = { VFIO_TYPE1_NESTING_IOMMU,
>> +                          VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
>>                            VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
>> -    int i;
>> +    int i, ret = -EINVAL;
>>
>>      for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
>>          if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
>> -            return iommu_types[i];
>> +            if (iommu_types[i] == VFIO_TYPE1_NESTING_IOMMU && !want_nested) {
>> +                continue;
>> +            }
>> +            ret = iommu_types[i];
>> +            break;
>>          }
>>      }
>> -    error_setg(errp, "No available IOMMU models");
>> -    return -EINVAL;
>> +    if (ret < 0) {
>> +        error_setg(errp, "No available IOMMU models");
>> +    } else if (want_nested && ret != VFIO_TYPE1_NESTING_IOMMU) {
>> +        error_setg(errp, "Nested mode requested but not supported");
>> +        ret = -EINVAL;
>> +    }
>> +    return ret;
>>  }
>>
>>  static int vfio_init_container(VFIOContainer *container, int group_fd,
>> -                               Error **errp)
>> +                               bool want_nested, Error **errp)
>>  {
>>      int iommu_type, ret;
>>
>> -    iommu_type = vfio_get_iommu_type(container, errp);
>> +    iommu_type = vfio_get_iommu_type(container, want_nested, errp);
>>      if (iommu_type < 0) {
>>          return iommu_type;
>>      }
>> @@ -1211,6 +1222,14 @@ static int vfio_connect_container(VFIOGroup *group,
>> AddressSpace *as,
>>      VFIOContainer *container;
>>      int ret, fd;
>>      VFIOAddressSpace *space;
>> +    IOMMUMemoryRegion *iommu_mr;
>> +    bool nested = false;
>> +
>> +    if (as != &address_space_memory && memory_region_is_iommu(as->root)) {
> 
> I tried on my side. For virtual VT-d, it doesn't work as in intel_iommu,
> we have a dynamic switch mechanism. Thus that, the
> memory_region_is_iommu(as->root) won't return true as expected. I'm afraid
> it doesn't work for virtual VT-d.  So firstly, I'm wondering if
> as != &address_space_memory is enough.

(as != &address_space_memory) should be sufficient to tell that a vIOMMU
is being used. But then, for example, you don't want to set nested
paging for the virtio-iommu because virtio-iommu/VFIO uses notify-on-my
(CM similar implementation). That's why I devised an attribute to
retrieve the vIOMMU need for nested.

 Secondly, I'm considering if it is
> good to let vfio_get_group() caller to provide a hint whether vIOMMU is
> exposed. e.g. vfio_realize() in vfio/pci.c could figure out whether vIOMMU
> is set easily. Thoughts?
Sorry I don't get your point here. Why is it easier to figure out
whether vIOMMU is set in vfio_realize()?

pci_device_iommu_address_space(pdev) !=  &address_space_memory
does determine whether a vIOMMU is in place, no?

Thanks

Eric
> 
> Regards,
> Yi Liu
>
Yi Liu March 31, 2020, 8:34 a.m. UTC | #3
> From: Auger Eric <eric.auger@redhat.com>
> Sent: Tuesday, March 31, 2020 4:05 PM
> To: Liu, Yi L <yi.l.liu@intel.com>; eric.auger.pro@gmail.com; qemu-
> Subject: Re: [RFC v6 09/24] vfio: Force nested if iommu requires it
> 
> Yi,
> 
> On 3/31/20 8:34 AM, Liu, Yi L wrote:
> > Hi Eric,
> >
> >> From: Eric Auger <eric.auger@redhat.com>
> >> Sent: Saturday, March 21, 2020 12:58 AM
> >> To: eric.auger.pro@gmail.com; eric.auger@redhat.com; qemu-
> devel@nongnu.org;
> >> Subject: [RFC v6 09/24] vfio: Force nested if iommu requires it
> >>
> >> In case we detect the address space is translated by
> >> a virtual IOMMU which requires HW nested paging to
> >> integrate with VFIO, let's set up the container with
> >> the VFIO_TYPE1_NESTING_IOMMU iommu_type.
> >>
> >> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> >>
> >> ---
> >>
> >> v4 -> v5:
> >> - fail immediatly if nested is wanted but not supported
> >>
> >> v2 -> v3:
> >> - add "nested only is selected if requested by @force_nested"
> >>   comment in this patch
> >> ---
> >>  hw/vfio/common.c | 36 ++++++++++++++++++++++++++++--------
> >>  1 file changed, 28 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> >> index 0b3593b3c0..ac417b5dbd 100644
> >> --- a/hw/vfio/common.c
> >> +++ b/hw/vfio/common.c
> >> @@ -1155,27 +1155,38 @@ static void
> vfio_put_address_space(VFIOAddressSpace
> >> *space)
> >>   * vfio_get_iommu_type - selects the richest iommu_type (v2 first)
> >>   */
> >>  static int vfio_get_iommu_type(VFIOContainer *container,
> >> +                               bool want_nested,
> >>                                 Error **errp)
> >>  {
> >> -    int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
> >> +    int iommu_types[] = { VFIO_TYPE1_NESTING_IOMMU,
> >> +                          VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
> >>                            VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
> >> -    int i;
> >> +    int i, ret = -EINVAL;
> >>
> >>      for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
> >>          if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
> >> -            return iommu_types[i];
> >> +            if (iommu_types[i] == VFIO_TYPE1_NESTING_IOMMU && !want_nested)
> {
> >> +                continue;
> >> +            }
> >> +            ret = iommu_types[i];
> >> +            break;
> >>          }
> >>      }
> >> -    error_setg(errp, "No available IOMMU models");
> >> -    return -EINVAL;
> >> +    if (ret < 0) {
> >> +        error_setg(errp, "No available IOMMU models");
> >> +    } else if (want_nested && ret != VFIO_TYPE1_NESTING_IOMMU) {
> >> +        error_setg(errp, "Nested mode requested but not supported");
> >> +        ret = -EINVAL;
> >> +    }
> >> +    return ret;
> >>  }
> >>
> >>  static int vfio_init_container(VFIOContainer *container, int group_fd,
> >> -                               Error **errp)
> >> +                               bool want_nested, Error **errp)
> >>  {
> >>      int iommu_type, ret;
> >>
> >> -    iommu_type = vfio_get_iommu_type(container, errp);
> >> +    iommu_type = vfio_get_iommu_type(container, want_nested, errp);
> >>      if (iommu_type < 0) {
> >>          return iommu_type;
> >>      }
> >> @@ -1211,6 +1222,14 @@ static int vfio_connect_container(VFIOGroup *group,
> >> AddressSpace *as,
> >>      VFIOContainer *container;
> >>      int ret, fd;
> >>      VFIOAddressSpace *space;
> >> +    IOMMUMemoryRegion *iommu_mr;
> >> +    bool nested = false;
> >> +
> >> +    if (as != &address_space_memory && memory_region_is_iommu(as->root))
> {
> >
> > I tried on my side. For virtual VT-d, it doesn't work as in intel_iommu,
> > we have a dynamic switch mechanism. Thus that, the
> > memory_region_is_iommu(as->root) won't return true as expected. I'm afraid
> > it doesn't work for virtual VT-d.  So firstly, I'm wondering if
> > as != &address_space_memory is enough.
> 
> (as != &address_space_memory) should be sufficient to tell that a vIOMMU
> is being used. But then, for example, you don't want to set nested
> paging for the virtio-iommu because virtio-iommu/VFIO uses notify-on-my
> (CM similar implementation). That's why I devised an attribute to
> retrieve the vIOMMU need for nested.
> 
>  Secondly, I'm considering if it is
> > good to let vfio_get_group() caller to provide a hint whether vIOMMU is
> > exposed. e.g. vfio_realize() in vfio/pci.c could figure out whether vIOMMU
> > is set easily. Thoughts?
> Sorry I don't get your point here. Why is it easier to figure out
> whether vIOMMU is set in vfio_realize()?
> 
> pci_device_iommu_address_space(pdev) !=  &address_space_memory
> does determine whether a vIOMMU is in place, no?
> 
No it's not just pci_device_iommu_address_space(pdev) !=  &address_space_memory,
I agree with your above comment, it's not enough to tell whether nesting is
needed or not. I'd like to add an API like pci_device_iommu_nesting_required(),
so that it can be determined. In the meanwhile, adding a query callback in
PCIIOMMUOps introduced in below pathc. Guess it works?

[v2,05/22] hw/pci: modify pci_setup_iommu() to set PCIIOMMUOps
https://patchwork.kernel.org/patch/11464577/

Regards,
Yi Liu
diff mbox series

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 0b3593b3c0..ac417b5dbd 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1155,27 +1155,38 @@  static void vfio_put_address_space(VFIOAddressSpace *space)
  * vfio_get_iommu_type - selects the richest iommu_type (v2 first)
  */
 static int vfio_get_iommu_type(VFIOContainer *container,
+                               bool want_nested,
                                Error **errp)
 {
-    int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
+    int iommu_types[] = { VFIO_TYPE1_NESTING_IOMMU,
+                          VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
                           VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
-    int i;
+    int i, ret = -EINVAL;
 
     for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
         if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
-            return iommu_types[i];
+            if (iommu_types[i] == VFIO_TYPE1_NESTING_IOMMU && !want_nested) {
+                continue;
+            }
+            ret = iommu_types[i];
+            break;
         }
     }
-    error_setg(errp, "No available IOMMU models");
-    return -EINVAL;
+    if (ret < 0) {
+        error_setg(errp, "No available IOMMU models");
+    } else if (want_nested && ret != VFIO_TYPE1_NESTING_IOMMU) {
+        error_setg(errp, "Nested mode requested but not supported");
+        ret = -EINVAL;
+    }
+    return ret;
 }
 
 static int vfio_init_container(VFIOContainer *container, int group_fd,
-                               Error **errp)
+                               bool want_nested, Error **errp)
 {
     int iommu_type, ret;
 
-    iommu_type = vfio_get_iommu_type(container, errp);
+    iommu_type = vfio_get_iommu_type(container, want_nested, errp);
     if (iommu_type < 0) {
         return iommu_type;
     }
@@ -1211,6 +1222,14 @@  static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
     VFIOContainer *container;
     int ret, fd;
     VFIOAddressSpace *space;
+    IOMMUMemoryRegion *iommu_mr;
+    bool nested = false;
+
+    if (as != &address_space_memory && memory_region_is_iommu(as->root)) {
+        iommu_mr = IOMMU_MEMORY_REGION(as->root);
+        memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_VFIO_NESTED,
+                                     (void *)&nested);
+    }
 
     space = vfio_get_address_space(as);
 
@@ -1272,12 +1291,13 @@  static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
     QLIST_INIT(&container->giommu_list);
     QLIST_INIT(&container->hostwin_list);
 
-    ret = vfio_init_container(container, group->fd, errp);
+    ret = vfio_init_container(container, group->fd, nested, errp);
     if (ret) {
         goto free_container_exit;
     }
 
     switch (container->iommu_type) {
+    case VFIO_TYPE1_NESTING_IOMMU:
     case VFIO_TYPE1v2_IOMMU:
     case VFIO_TYPE1_IOMMU:
     {