diff mbox series

vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

Message ID 20230128031740.166743-1-sunnanyong@huawei.com (mailing list archive)
State New, archived
Headers show
Series vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI | expand

Commit Message

Nanyong Sun Jan. 28, 2023, 3:17 a.m. UTC
From: Rong Wang <wangrong68@huawei.com>

Once enable iommu domain for one device, the MSI
translation tables have to be there for software-managed MSI.
Otherwise, platform with software-managed MSI without an
irq bypass function, can not get a correct memory write event
from pcie, will not get irqs.
The solution is to obtain the MSI phy base address from
iommu reserved region, and set it to iommu MSI cookie,
then translation tables will be created while request irq.

Signed-off-by: Rong Wang <wangrong68@huawei.com>
Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
---
 drivers/iommu/iommu.c |  1 +
 drivers/vhost/vdpa.c  | 53 ++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 51 insertions(+), 3 deletions(-)

Comments

Jason Wang Jan. 29, 2023, 6:02 a.m. UTC | #1
On Sat, Jan 28, 2023 at 10:25 AM Nanyong Sun <sunnanyong@huawei.com> wrote:
>
> From: Rong Wang <wangrong68@huawei.com>
>
> Once enable iommu domain for one device, the MSI
> translation tables have to be there for software-managed MSI.
> Otherwise, platform with software-managed MSI without an
> irq bypass function, can not get a correct memory write event
> from pcie, will not get irqs.
> The solution is to obtain the MSI phy base address from
> iommu reserved region, and set it to iommu MSI cookie,
> then translation tables will be created while request irq.
>
> Signed-off-by: Rong Wang <wangrong68@huawei.com>
> Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
> ---
>  drivers/iommu/iommu.c |  1 +
>  drivers/vhost/vdpa.c  | 53 ++++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 51 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index de91dd88705b..f6c65d5d8e2b 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -2623,6 +2623,7 @@ void iommu_get_resv_regions(struct device *dev, struct list_head *list)
>         if (ops->get_resv_regions)
>                 ops->get_resv_regions(dev, list);
>  }
> +EXPORT_SYMBOL_GPL(iommu_get_resv_regions);
>
>  /**
>   * iommu_put_resv_regions - release resered regions
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index ec32f785dfde..31d3e9ed4cfa 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -1103,6 +1103,48 @@ static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
>         return vhost_chr_write_iter(dev, from);
>  }
>
> +static bool vhost_vdpa_check_sw_msi(struct list_head *dev_resv_regions, phys_addr_t *base)
> +{
> +       struct iommu_resv_region *region;
> +       bool ret = false;
> +
> +       list_for_each_entry(region, dev_resv_regions, list) {
> +               /*
> +                * The presence of any 'real' MSI regions should take
> +                * precedence over the software-managed one if the
> +                * IOMMU driver happens to advertise both types.
> +                */
> +               if (region->type == IOMMU_RESV_MSI) {
> +                       ret = false;
> +                       break;
> +               }
> +
> +               if (region->type == IOMMU_RESV_SW_MSI) {
> +                       *base = region->start;
> +                       ret = true;
> +               }
> +       }
> +
> +       return ret;
> +}

Can we unify this with what VFIO had?

> +
> +static int vhost_vdpa_get_msi_cookie(struct iommu_domain *domain, struct device *dma_dev)
> +{
> +       struct list_head dev_resv_regions;
> +       phys_addr_t resv_msi_base = 0;
> +       int ret = 0;
> +
> +       INIT_LIST_HEAD(&dev_resv_regions);
> +       iommu_get_resv_regions(dma_dev, &dev_resv_regions);
> +
> +       if (vhost_vdpa_check_sw_msi(&dev_resv_regions, &resv_msi_base))
> +               ret = iommu_get_msi_cookie(domain, resv_msi_base);
> +
> +       iommu_put_resv_regions(dma_dev, &dev_resv_regions);
> +
> +       return ret;
> +}
> +
>  static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
>  {
>         struct vdpa_device *vdpa = v->vdpa;
> @@ -1128,11 +1170,16 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
>
>         ret = iommu_attach_device(v->domain, dma_dev);
>         if (ret)
> -               goto err_attach;
> +               goto err_alloc_domain;
>
> -       return 0;
> +       ret = vhost_vdpa_get_msi_cookie(v->domain, dma_dev);

Do we need to check the overlap mapping and record it in the interval
tree (as what VFIO did)?

Thanks

> +       if (ret)
> +               goto err_attach_device;
>
> -err_attach:
> +       return 0;
> +err_attach_device:
> +       iommu_detach_device(v->domain, dma_dev);
> +err_alloc_domain:
>         iommu_domain_free(v->domain);
>         return ret;
>  }
> --
> 2.25.1
>
Nanyong Sun Jan. 31, 2023, 1:32 a.m. UTC | #2
On 2023/1/29 14:02, Jason Wang wrote:
> On Sat, Jan 28, 2023 at 10:25 AM Nanyong Sun <sunnanyong@huawei.com> wrote:
>> From: Rong Wang <wangrong68@huawei.com>
>>
>> Once enable iommu domain for one device, the MSI
>> translation tables have to be there for software-managed MSI.
>> Otherwise, platform with software-managed MSI without an
>> irq bypass function, can not get a correct memory write event
>> from pcie, will not get irqs.
>> The solution is to obtain the MSI phy base address from
>> iommu reserved region, and set it to iommu MSI cookie,
>> then translation tables will be created while request irq.
>>
>> Signed-off-by: Rong Wang <wangrong68@huawei.com>
>> Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
>> ---
>>   drivers/iommu/iommu.c |  1 +
>>   drivers/vhost/vdpa.c  | 53 ++++++++++++++++++++++++++++++++++++++++---
>>   2 files changed, 51 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>> index de91dd88705b..f6c65d5d8e2b 100644
>> --- a/drivers/iommu/iommu.c
>> +++ b/drivers/iommu/iommu.c
>> @@ -2623,6 +2623,7 @@ void iommu_get_resv_regions(struct device *dev, struct list_head *list)
>>          if (ops->get_resv_regions)
>>                  ops->get_resv_regions(dev, list);
>>   }
>> +EXPORT_SYMBOL_GPL(iommu_get_resv_regions);
>>
>>   /**
>>    * iommu_put_resv_regions - release resered regions
>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>> index ec32f785dfde..31d3e9ed4cfa 100644
>> --- a/drivers/vhost/vdpa.c
>> +++ b/drivers/vhost/vdpa.c
>> @@ -1103,6 +1103,48 @@ static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
>>          return vhost_chr_write_iter(dev, from);
>>   }
>>
>> +static bool vhost_vdpa_check_sw_msi(struct list_head *dev_resv_regions, phys_addr_t *base)
>> +{
>> +       struct iommu_resv_region *region;
>> +       bool ret = false;
>> +
>> +       list_for_each_entry(region, dev_resv_regions, list) {
>> +               /*
>> +                * The presence of any 'real' MSI regions should take
>> +                * precedence over the software-managed one if the
>> +                * IOMMU driver happens to advertise both types.
>> +                */
>> +               if (region->type == IOMMU_RESV_MSI) {
>> +                       ret = false;
>> +                       break;
>> +               }
>> +
>> +               if (region->type == IOMMU_RESV_SW_MSI) {
>> +                       *base = region->start;
>> +                       ret = true;
>> +               }
>> +       }
>> +
>> +       return ret;
>> +}
> Can we unify this with what VFIO had?
Yes, these two functions are just the same.
Do you think move this function to iommu.c, and export from iommu is a 
good choice?
>
>> +
>> +static int vhost_vdpa_get_msi_cookie(struct iommu_domain *domain, struct device *dma_dev)
>> +{
>> +       struct list_head dev_resv_regions;
>> +       phys_addr_t resv_msi_base = 0;
>> +       int ret = 0;
>> +
>> +       INIT_LIST_HEAD(&dev_resv_regions);
>> +       iommu_get_resv_regions(dma_dev, &dev_resv_regions);
>> +
>> +       if (vhost_vdpa_check_sw_msi(&dev_resv_regions, &resv_msi_base))
>> +               ret = iommu_get_msi_cookie(domain, resv_msi_base);
>> +
>> +       iommu_put_resv_regions(dma_dev, &dev_resv_regions);
>> +
>> +       return ret;
>> +}
>> +
>>   static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
>>   {
>>          struct vdpa_device *vdpa = v->vdpa;
>> @@ -1128,11 +1170,16 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
>>
>>          ret = iommu_attach_device(v->domain, dma_dev);
>>          if (ret)
>> -               goto err_attach;
>> +               goto err_alloc_domain;
>>
>> -       return 0;
>> +       ret = vhost_vdpa_get_msi_cookie(v->domain, dma_dev);
> Do we need to check the overlap mapping and record it in the interval
> tree (as what VFIO did)?
>
> Thanks
Yes, we need to care about this part, I will handle this recently.
Thanks a lot.
>> +       if (ret)
>> +               goto err_attach_device;
>>
>> -err_attach:
>> +       return 0;
>> +err_attach_device:
>> +       iommu_detach_device(v->domain, dma_dev);
>> +err_alloc_domain:
>>          iommu_domain_free(v->domain);
>>          return ret;
>>   }
>> --
>> 2.25.1
>>
> .
Jason Wang Jan. 31, 2023, 3:06 a.m. UTC | #3
On Tue, Jan 31, 2023 at 9:32 AM Nanyong Sun <sunnanyong@huawei.com> wrote:
>
> On 2023/1/29 14:02, Jason Wang wrote:
> > On Sat, Jan 28, 2023 at 10:25 AM Nanyong Sun <sunnanyong@huawei.com> wrote:
> >> From: Rong Wang <wangrong68@huawei.com>
> >>
> >> Once enable iommu domain for one device, the MSI
> >> translation tables have to be there for software-managed MSI.
> >> Otherwise, platform with software-managed MSI without an
> >> irq bypass function, can not get a correct memory write event
> >> from pcie, will not get irqs.
> >> The solution is to obtain the MSI phy base address from
> >> iommu reserved region, and set it to iommu MSI cookie,
> >> then translation tables will be created while request irq.
> >>
> >> Signed-off-by: Rong Wang <wangrong68@huawei.com>
> >> Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
> >> ---
> >>   drivers/iommu/iommu.c |  1 +
> >>   drivers/vhost/vdpa.c  | 53 ++++++++++++++++++++++++++++++++++++++++---
> >>   2 files changed, 51 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> >> index de91dd88705b..f6c65d5d8e2b 100644
> >> --- a/drivers/iommu/iommu.c
> >> +++ b/drivers/iommu/iommu.c
> >> @@ -2623,6 +2623,7 @@ void iommu_get_resv_regions(struct device *dev, struct list_head *list)
> >>          if (ops->get_resv_regions)
> >>                  ops->get_resv_regions(dev, list);
> >>   }
> >> +EXPORT_SYMBOL_GPL(iommu_get_resv_regions);
> >>
> >>   /**
> >>    * iommu_put_resv_regions - release resered regions
> >> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> >> index ec32f785dfde..31d3e9ed4cfa 100644
> >> --- a/drivers/vhost/vdpa.c
> >> +++ b/drivers/vhost/vdpa.c
> >> @@ -1103,6 +1103,48 @@ static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
> >>          return vhost_chr_write_iter(dev, from);
> >>   }
> >>
> >> +static bool vhost_vdpa_check_sw_msi(struct list_head *dev_resv_regions, phys_addr_t *base)
> >> +{
> >> +       struct iommu_resv_region *region;
> >> +       bool ret = false;
> >> +
> >> +       list_for_each_entry(region, dev_resv_regions, list) {
> >> +               /*
> >> +                * The presence of any 'real' MSI regions should take
> >> +                * precedence over the software-managed one if the
> >> +                * IOMMU driver happens to advertise both types.
> >> +                */
> >> +               if (region->type == IOMMU_RESV_MSI) {
> >> +                       ret = false;
> >> +                       break;
> >> +               }
> >> +
> >> +               if (region->type == IOMMU_RESV_SW_MSI) {
> >> +                       *base = region->start;
> >> +                       ret = true;
> >> +               }
> >> +       }
> >> +
> >> +       return ret;
> >> +}
> > Can we unify this with what VFIO had?
> Yes, these two functions are just the same.
> Do you think move this function to iommu.c, and export from iommu is a
> good choice?

Probably, we can try and see.

> >
> >> +
> >> +static int vhost_vdpa_get_msi_cookie(struct iommu_domain *domain, struct device *dma_dev)
> >> +{
> >> +       struct list_head dev_resv_regions;
> >> +       phys_addr_t resv_msi_base = 0;
> >> +       int ret = 0;
> >> +
> >> +       INIT_LIST_HEAD(&dev_resv_regions);
> >> +       iommu_get_resv_regions(dma_dev, &dev_resv_regions);
> >> +
> >> +       if (vhost_vdpa_check_sw_msi(&dev_resv_regions, &resv_msi_base))
> >> +               ret = iommu_get_msi_cookie(domain, resv_msi_base);
> >> +
> >> +       iommu_put_resv_regions(dma_dev, &dev_resv_regions);
> >> +
> >> +       return ret;
> >> +}
> >> +
> >>   static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
> >>   {
> >>          struct vdpa_device *vdpa = v->vdpa;
> >> @@ -1128,11 +1170,16 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
> >>
> >>          ret = iommu_attach_device(v->domain, dma_dev);
> >>          if (ret)
> >> -               goto err_attach;
> >> +               goto err_alloc_domain;
> >>
> >> -       return 0;
> >> +       ret = vhost_vdpa_get_msi_cookie(v->domain, dma_dev);
> > Do we need to check the overlap mapping and record it in the interval
> > tree (as what VFIO did)?
> >
> > Thanks
> Yes, we need to care about this part, I will handle this recently.
> Thanks a lot.

I think for parents that requires vendor specific mapping logic we
probably also need this.

But this could be added on top (via a new config ops probably).

Thanks

> >> +       if (ret)
> >> +               goto err_attach_device;
> >>
> >> -err_attach:
> >> +       return 0;
> >> +err_attach_device:
> >> +       iommu_detach_device(v->domain, dma_dev);
> >> +err_alloc_domain:
> >>          iommu_domain_free(v->domain);
> >>          return ret;
> >>   }
> >> --
> >> 2.25.1
> >>
> > .
>
diff mbox series

Patch

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index de91dd88705b..f6c65d5d8e2b 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2623,6 +2623,7 @@  void iommu_get_resv_regions(struct device *dev, struct list_head *list)
 	if (ops->get_resv_regions)
 		ops->get_resv_regions(dev, list);
 }
+EXPORT_SYMBOL_GPL(iommu_get_resv_regions);
 
 /**
  * iommu_put_resv_regions - release resered regions
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index ec32f785dfde..31d3e9ed4cfa 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -1103,6 +1103,48 @@  static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
 	return vhost_chr_write_iter(dev, from);
 }
 
+static bool vhost_vdpa_check_sw_msi(struct list_head *dev_resv_regions, phys_addr_t *base)
+{
+	struct iommu_resv_region *region;
+	bool ret = false;
+
+	list_for_each_entry(region, dev_resv_regions, list) {
+		/*
+		 * The presence of any 'real' MSI regions should take
+		 * precedence over the software-managed one if the
+		 * IOMMU driver happens to advertise both types.
+		 */
+		if (region->type == IOMMU_RESV_MSI) {
+			ret = false;
+			break;
+		}
+
+		if (region->type == IOMMU_RESV_SW_MSI) {
+			*base = region->start;
+			ret = true;
+		}
+	}
+
+	return ret;
+}
+
+static int vhost_vdpa_get_msi_cookie(struct iommu_domain *domain, struct device *dma_dev)
+{
+	struct list_head dev_resv_regions;
+	phys_addr_t resv_msi_base = 0;
+	int ret = 0;
+
+	INIT_LIST_HEAD(&dev_resv_regions);
+	iommu_get_resv_regions(dma_dev, &dev_resv_regions);
+
+	if (vhost_vdpa_check_sw_msi(&dev_resv_regions, &resv_msi_base))
+		ret = iommu_get_msi_cookie(domain, resv_msi_base);
+
+	iommu_put_resv_regions(dma_dev, &dev_resv_regions);
+
+	return ret;
+}
+
 static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
 {
 	struct vdpa_device *vdpa = v->vdpa;
@@ -1128,11 +1170,16 @@  static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
 
 	ret = iommu_attach_device(v->domain, dma_dev);
 	if (ret)
-		goto err_attach;
+		goto err_alloc_domain;
 
-	return 0;
+	ret = vhost_vdpa_get_msi_cookie(v->domain, dma_dev);
+	if (ret)
+		goto err_attach_device;
 
-err_attach:
+	return 0;
+err_attach_device:
+	iommu_detach_device(v->domain, dma_dev);
+err_alloc_domain:
 	iommu_domain_free(v->domain);
 	return ret;
 }