diff mbox series

[v2,05/17] iommufd: Keep track of each device's reserved regions instead of groups

Message ID 5-v2-51b9896e7862+8a8c-iommufd_alloc_jgg@nvidia.com (mailing list archive)
State New
Headers show
Series Add iommufd physical device operations for replace and alloc hwpt | expand

Commit Message

Jason Gunthorpe March 8, 2023, 12:35 a.m. UTC
The driver facing API in the iommu core makes the reserved regions
per-device. An algorithm in the core code consolidates the regions of all
the devices in a group to return the group view.

To allow for devices to be hotplugged into the group iommufd would re-load
the entire group's reserved regions for each device, just in case they
changed.

Further iommufd already has to deal with duplicated/overlapping reserved
regions as it must union all the groups together.

Thus simplify all of this to just use the device reserved regions
interface directly from the iommu driver.

Suggested-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/device.c          |  5 ++---
 drivers/iommu/iommufd/io_pagetable.c    | 25 ++++++++++---------------
 drivers/iommu/iommufd/iommufd_private.h |  7 +++----
 3 files changed, 15 insertions(+), 22 deletions(-)

Comments

Baolu Lu March 8, 2023, 12:32 p.m. UTC | #1
On 2023/3/8 8:35, Jason Gunthorpe wrote:
> The driver facing API in the iommu core makes the reserved regions
> per-device. An algorithm in the core code consolidates the regions of all
> the devices in a group to return the group view.
> 
> To allow for devices to be hotplugged into the group iommufd would re-load
> the entire group's reserved regions for each device, just in case they
> changed.
> 
> Further iommufd already has to deal with duplicated/overlapping reserved
> regions as it must union all the groups together.
> 
> Thus simplify all of this to just use the device reserved regions
> interface directly from the iommu driver.
> 
> Suggested-by: Kevin Tian <kevin.tian@intel.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>   drivers/iommu/iommufd/device.c          |  5 ++---
>   drivers/iommu/iommufd/io_pagetable.c    | 25 ++++++++++---------------
>   drivers/iommu/iommufd/iommufd_private.h |  7 +++----
>   3 files changed, 15 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
> index a4bf4c5826ded2..b546250dd1e226 100644
> --- a/drivers/iommu/iommufd/device.c
> +++ b/drivers/iommu/iommufd/device.c
> @@ -310,9 +310,8 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
>   		}
>   	}
>   
> -	rc = iopt_table_enforce_group_resv_regions(&hwpt->ioas->iopt, idev->dev,
> -						   idev->igroup->group,
> -						   &sw_msi_start);
> +	rc = iopt_table_enforce_dev_resv_regions(
> +		&hwpt->ioas->iopt, idev->dev, &sw_msi_start);
>   	if (rc)
>   		return rc;
>   
> diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
> index e0ae72b9e67f86..427f0cc0f07955 100644
> --- a/drivers/iommu/iommufd/io_pagetable.c
> +++ b/drivers/iommu/iommufd/io_pagetable.c
> @@ -1162,24 +1162,21 @@ void iopt_remove_access(struct io_pagetable *iopt,
>   }
>   
>   /* Narrow the valid_iova_itree to include reserved ranges from a group. */
> -int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
> -					  struct device *device,
> -					  struct iommu_group *group,
> -					  phys_addr_t *sw_msi_start)
> +int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
> +					struct device *dev,
> +					phys_addr_t *sw_msi_start)
>   {
>   	struct iommu_resv_region *resv;
> -	struct iommu_resv_region *tmp;
> -	LIST_HEAD(group_resv_regions);
> +	LIST_HEAD(resv_regions);
>   	unsigned int num_hw_msi = 0;
>   	unsigned int num_sw_msi = 0;
>   	int rc;
>   
>   	down_write(&iopt->iova_rwsem);
> -	rc = iommu_get_group_resv_regions(group, &group_resv_regions);
> -	if (rc)
> -		goto out_unlock;
> +	/* FIXME: drivers allocate memory but there is no failure propogated */
> +	iommu_get_resv_regions(dev, &resv_regions);
>   
> -	list_for_each_entry(resv, &group_resv_regions, list) {
> +	list_for_each_entry(resv, &resv_regions, list) {
>   		if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE)
>   			continue;
>   
> @@ -1191,7 +1188,7 @@ int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
>   		}
>   
>   		rc = iopt_reserve_iova(iopt, resv->start,
> -				       resv->length - 1 + resv->start, device);
> +				       resv->length - 1 + resv->start, dev);
>   		if (rc)
>   			goto out_reserved;
>   	}
> @@ -1206,11 +1203,9 @@ int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
>   	goto out_free_resv;
>   
>   out_reserved:
> -	__iopt_remove_reserved_iova(iopt, device);
> +	__iopt_remove_reserved_iova(iopt, dev);
>   out_free_resv:
> -	list_for_each_entry_safe(resv, tmp, &group_resv_regions, list)
> -		kfree(resv);
> -out_unlock:
> +	iommu_put_resv_regions(dev, &resv_regions);
>   	up_write(&iopt->iova_rwsem);
>   	return rc;
>   }
> diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
> index 2ff192777f27d3..22863759c3bfb0 100644
> --- a/drivers/iommu/iommufd/iommufd_private.h
> +++ b/drivers/iommu/iommufd/iommufd_private.h
> @@ -76,10 +76,9 @@ int iopt_table_add_domain(struct io_pagetable *iopt,
>   			  struct iommu_domain *domain);
>   void iopt_table_remove_domain(struct io_pagetable *iopt,
>   			      struct iommu_domain *domain);
> -int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
> -					  struct device *device,
> -					  struct iommu_group *group,
> -					  phys_addr_t *sw_msi_start);
> +int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
> +					struct device *dev,
> +					phys_addr_t *sw_msi_start);
>   int iopt_set_allow_iova(struct io_pagetable *iopt,
>   			struct rb_root_cached *allowed_iova);
>   int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,

Perhaps a silly question. The reserved regions are enforced in IOVA when
a device is added to the igroup's device list. Should it be released
after the device is removed from the list?

This may not be appropriate because the devices may share some common
reserved regions, such as the IOMMU_RESV_MSI.

If that's the fact,

Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>

Best regards,
baolu
Jason Gunthorpe March 8, 2023, 3:02 p.m. UTC | #2
On Wed, Mar 08, 2023 at 08:32:03PM +0800, Baolu Lu wrote:
> > diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
> > index 2ff192777f27d3..22863759c3bfb0 100644
> > --- a/drivers/iommu/iommufd/iommufd_private.h
> > +++ b/drivers/iommu/iommufd/iommufd_private.h
> > @@ -76,10 +76,9 @@ int iopt_table_add_domain(struct io_pagetable *iopt,
> >   			  struct iommu_domain *domain);
> >   void iopt_table_remove_domain(struct io_pagetable *iopt,
> >   			      struct iommu_domain *domain);
> > -int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
> > -					  struct device *device,
> > -					  struct iommu_group *group,
> > -					  phys_addr_t *sw_msi_start);
> > +int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
> > +					struct device *dev,
> > +					phys_addr_t *sw_msi_start);
> >   int iopt_set_allow_iova(struct io_pagetable *iopt,
> >   			struct rb_root_cached *allowed_iova);
> >   int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,
> 
> Perhaps a silly question. The reserved regions are enforced in IOVA when
> a device is added to the igroup's device list. Should it be released
> after the device is removed from the list?

Yes, it is, iommufd_hw_pagetable_detach() does it right after the
list_del()

> This may not be appropriate because the devices may share some common
> reserved regions, such as the IOMMU_RESV_MSI.

Common reserved regions are duplicated for every device just as they
were duplicated for every group.

The duplicates are keyed with an owner that is equal to the 'struct
device *' so we only remove the IOVA specific to the struct device.

The interval tree effectively unions the duplicated and overlapping
IOVA regions during lookup.

It is done this way to avoid memory allocation on destruction
paths. We never have to chop up merged IOVA regions or something to
remove a device.

Jason
Baolu Lu March 9, 2023, 1:43 a.m. UTC | #3
On 3/8/23 11:02 PM, Jason Gunthorpe wrote:
> On Wed, Mar 08, 2023 at 08:32:03PM +0800, Baolu Lu wrote:
>>> diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
>>> index 2ff192777f27d3..22863759c3bfb0 100644
>>> --- a/drivers/iommu/iommufd/iommufd_private.h
>>> +++ b/drivers/iommu/iommufd/iommufd_private.h
>>> @@ -76,10 +76,9 @@ int iopt_table_add_domain(struct io_pagetable *iopt,
>>>    			  struct iommu_domain *domain);
>>>    void iopt_table_remove_domain(struct io_pagetable *iopt,
>>>    			      struct iommu_domain *domain);
>>> -int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
>>> -					  struct device *device,
>>> -					  struct iommu_group *group,
>>> -					  phys_addr_t *sw_msi_start);
>>> +int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
>>> +					struct device *dev,
>>> +					phys_addr_t *sw_msi_start);
>>>    int iopt_set_allow_iova(struct io_pagetable *iopt,
>>>    			struct rb_root_cached *allowed_iova);
>>>    int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,
>>
>> Perhaps a silly question. The reserved regions are enforced in IOVA when
>> a device is added to the igroup's device list. Should it be released
>> after the device is removed from the list?
> 
> Yes, it is, iommufd_hw_pagetable_detach() does it right after the
> list_del()
> 
>> This may not be appropriate because the devices may share some common
>> reserved regions, such as the IOMMU_RESV_MSI.
> 
> Common reserved regions are duplicated for every device just as they
> were duplicated for every group.
> 
> The duplicates are keyed with an owner that is equal to the 'struct
> device *' so we only remove the IOVA specific to the struct device.
> 
> The interval tree effectively unions the duplicated and overlapping
> IOVA regions during lookup.
> 
> It is done this way to avoid memory allocation on destruction
> paths. We never have to chop up merged IOVA regions or something to
> remove a device.

Ah! Yes. This has been considered. I should read further into
iopt_reserve_iova() and iopt_remove_reserved_iova(). Thanks for the
explanation.

Best regards,
baolu
Tian, Kevin March 10, 2023, 10:42 a.m. UTC | #4
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Wednesday, March 8, 2023 8:36 AM
> 
> 
>  /* Narrow the valid_iova_itree to include reserved ranges from a group. */

s/group/device/

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
diff mbox series

Patch

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index a4bf4c5826ded2..b546250dd1e226 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -310,9 +310,8 @@  int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
 		}
 	}
 
-	rc = iopt_table_enforce_group_resv_regions(&hwpt->ioas->iopt, idev->dev,
-						   idev->igroup->group,
-						   &sw_msi_start);
+	rc = iopt_table_enforce_dev_resv_regions(
+		&hwpt->ioas->iopt, idev->dev, &sw_msi_start);
 	if (rc)
 		return rc;
 
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index e0ae72b9e67f86..427f0cc0f07955 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -1162,24 +1162,21 @@  void iopt_remove_access(struct io_pagetable *iopt,
 }
 
 /* Narrow the valid_iova_itree to include reserved ranges from a group. */
-int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
-					  struct device *device,
-					  struct iommu_group *group,
-					  phys_addr_t *sw_msi_start)
+int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
+					struct device *dev,
+					phys_addr_t *sw_msi_start)
 {
 	struct iommu_resv_region *resv;
-	struct iommu_resv_region *tmp;
-	LIST_HEAD(group_resv_regions);
+	LIST_HEAD(resv_regions);
 	unsigned int num_hw_msi = 0;
 	unsigned int num_sw_msi = 0;
 	int rc;
 
 	down_write(&iopt->iova_rwsem);
-	rc = iommu_get_group_resv_regions(group, &group_resv_regions);
-	if (rc)
-		goto out_unlock;
+	/* FIXME: drivers allocate memory but there is no failure propogated */
+	iommu_get_resv_regions(dev, &resv_regions);
 
-	list_for_each_entry(resv, &group_resv_regions, list) {
+	list_for_each_entry(resv, &resv_regions, list) {
 		if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE)
 			continue;
 
@@ -1191,7 +1188,7 @@  int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
 		}
 
 		rc = iopt_reserve_iova(iopt, resv->start,
-				       resv->length - 1 + resv->start, device);
+				       resv->length - 1 + resv->start, dev);
 		if (rc)
 			goto out_reserved;
 	}
@@ -1206,11 +1203,9 @@  int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
 	goto out_free_resv;
 
 out_reserved:
-	__iopt_remove_reserved_iova(iopt, device);
+	__iopt_remove_reserved_iova(iopt, dev);
 out_free_resv:
-	list_for_each_entry_safe(resv, tmp, &group_resv_regions, list)
-		kfree(resv);
-out_unlock:
+	iommu_put_resv_regions(dev, &resv_regions);
 	up_write(&iopt->iova_rwsem);
 	return rc;
 }
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 2ff192777f27d3..22863759c3bfb0 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -76,10 +76,9 @@  int iopt_table_add_domain(struct io_pagetable *iopt,
 			  struct iommu_domain *domain);
 void iopt_table_remove_domain(struct io_pagetable *iopt,
 			      struct iommu_domain *domain);
-int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
-					  struct device *device,
-					  struct iommu_group *group,
-					  phys_addr_t *sw_msi_start);
+int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
+					struct device *dev,
+					phys_addr_t *sw_msi_start);
 int iopt_set_allow_iova(struct io_pagetable *iopt,
 			struct rb_root_cached *allowed_iova);
 int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,