diff mbox series

[v7,3/4] iommu/vt-d: Add set_dev_pasid callback for dma domain

Message ID 20230523173451.2932113-4-jacob.jun.pan@linux.intel.com (mailing list archive)
State Superseded
Headers show
Series Re-enable IDXD kernel workqueue under DMA API | expand

Commit Message

Jacob Pan May 23, 2023, 5:34 p.m. UTC
From: Lu Baolu <baolu.lu@linux.intel.com>

This allows the upper layers to set a domain to a PASID of a device
if the PASID feature is supported by the IOMMU hardware. The typical
use cases are, for example, kernel DMA with PASID and hardware
assisted mediated device drivers.

The attaching device and pasid information is tracked in a per-domain
list and is used for IOTLB and devTLB invalidation.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
---
 drivers/iommu/intel/iommu.c | 139 +++++++++++++++++++++++++++++++++---
 drivers/iommu/intel/iommu.h |   7 ++
 2 files changed, 135 insertions(+), 11 deletions(-)

Comments

Tian, Kevin May 25, 2023, 6:56 a.m. UTC | #1
> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Sent: Wednesday, May 24, 2023 1:35 AM
> 
> @@ -1472,6 +1482,37 @@ static void iommu_flush_dev_iotlb(struct
> dmar_domain *domain,
>  	spin_lock_irqsave(&domain->lock, flags);
>  	list_for_each_entry(info, &domain->devices, link)
>  		__iommu_flush_dev_iotlb(info, addr, mask);
> +
> +	list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain)
> {
> +		info = dev_iommu_priv_get(dev_pasid->dev);
> +		qi_flush_dev_iotlb_pasid(info->iommu,
> +					 PCI_DEVID(info->bus, info->devfn),
> +					 info->pfsid, dev_pasid->pasid,
> +					 info->ats_qdep, addr,
> +					 mask);
> +	}

Check info->ats_enabled instead of doing it blindly.

> +static void domain_flush_pasid_iotlb(struct intel_iommu *iommu,
> +				     struct dmar_domain *domain, u64 addr,
> +				     unsigned long npages, bool ih)
> +{
> +	u16 did = domain_id_iommu(domain, iommu);
> +	struct dev_pasid_info *dev_pasid;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&domain->lock, flags);
> +	list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain)
> +		qi_flush_piotlb(iommu, did, dev_pasid->pasid, addr, npages,
> ih);
> +
> +	if (!list_empty(&domain->devices))
> +		qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr,
> npages, ih);

Old code doesn't have this empty list check. I'm not sure whether any
corner case might exist but if you do plan to add it it's better to put it
in a separate patch to allow bisect.

>  	spin_unlock_irqrestore(&domain->lock, flags);
>  }
> 
> @@ -1492,7 +1533,7 @@ static void iommu_flush_iotlb_psi(struct
> intel_iommu *iommu,
>  		ih = 1 << 6;
> 
>  	if (domain->use_first_level) {
> -		qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, pages,
> ih);
> +		domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih);
>  	} else {
>  		unsigned long bitmask = aligned_pages - 1;
> 

Why cannot this pasid be used with a second level config?

> @@ -4720,25 +4762,99 @@ static void intel_iommu_iotlb_sync_map(struct
> iommu_domain *domain,
>  static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t
> pasid)
>  {
>  	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
> +	struct dev_pasid_info *curr, *dev_pasid = NULL;
> +	struct dmar_domain *dmar_domain;
>  	struct iommu_domain *domain;
> +	unsigned long flags;
> 
> -	/* Domain type specific cleanup: */
>  	domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
> -	if (domain) {
> -		switch (domain->type) {
> -		case IOMMU_DOMAIN_SVA:
> -			intel_svm_remove_dev_pasid(dev, pasid);
> -			break;
> -		default:
> -			/* should never reach here */
> -			WARN_ON(1);
> +	if (!domain)
> +		goto out_tear_down;
> +
> +	/*
> +	 * The SVA implementation needs to stop mm notification, drain the
> +	 * pending page fault requests before tearing down the pasid entry.
> +	 * The VT-d spec (section 6.2.3.1) also recommends that software
> +	 * could use a reserved domain id for all first-only and pass-through
> +	 * translations. Hence there's no need to call
> domain_detach_iommu()
> +	 * in the sva domain case.
> +	 */

It's probably clearer to say:

/*
 * SVA domain requires special treatment before tearing down the pasid
 * entry:
 *   1) pasid is stored in mm instead of in dev_pasid;
 *   2) all SVA domains share a reserved domain id per recommendation
 *      from VT-d spec (section 6.2.3.1) so domain_detach_iommu() is
 *      not required;
 *   3) additional cleanup is required e.g. stopping mm notification,
 *      draining the pending page fault requests, etc.
 * Better handle it in a separate helper.
 */

> 
> +static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
> +				     struct device *dev, ioasid_t pasid)
> +{
> +	struct device_domain_info *info = dev_iommu_priv_get(dev);
> +	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> +	struct intel_iommu *iommu = info->iommu;
> +	struct dev_pasid_info *dev_pasid;
> +	unsigned long flags;
> +	int ret;
> +
> +	if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
> +		return -EOPNOTSUPP;
> +
> +	if (context_copied(iommu, info->bus, info->devfn))
> +		return -EBUSY;
> +
> +	ret = prepare_domain_attach_device(domain, dev);
> +	if (ret)
> +		return ret;
> +
> +	dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
> +	if (!dev_pasid)
> +		return -ENOMEM;

should it check whether this pasid has been attached?

> +
> +	ret = domain_attach_iommu(dmar_domain, iommu);
> +	if (ret)
> +		goto out_free;
> +
> +	if (domain_type_is_si(dmar_domain))
> +		ret = intel_pasid_setup_pass_through(iommu, dmar_domain,
> +						     dev, pasid);
> +	else if (dmar_domain->use_first_level)
> +		ret = domain_setup_first_level(iommu, dmar_domain,
> +					       dev, pasid);
> +	else
> +		ret = intel_pasid_setup_second_level(iommu, dmar_domain,
> +						     dev, pasid);

Here you allow attaching pasid to a domain using second-level but all
prior changes are only for first-level.
Baolu Lu May 26, 2023, 2:43 a.m. UTC | #2
On 5/25/23 2:56 PM, Tian, Kevin wrote:
>> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Sent: Wednesday, May 24, 2023 1:35 AM
>>
>> @@ -1472,6 +1482,37 @@ static void iommu_flush_dev_iotlb(struct
>> dmar_domain *domain,
>>   	spin_lock_irqsave(&domain->lock, flags);
>>   	list_for_each_entry(info, &domain->devices, link)
>>   		__iommu_flush_dev_iotlb(info, addr, mask);
>> +
>> +	list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain)
>> {
>> +		info = dev_iommu_priv_get(dev_pasid->dev);
>> +		qi_flush_dev_iotlb_pasid(info->iommu,
>> +					 PCI_DEVID(info->bus, info->devfn),
>> +					 info->pfsid, dev_pasid->pasid,
>> +					 info->ats_qdep, addr,
>> +					 mask);
>> +	}
> 
> Check info->ats_enabled instead of doing it blindly.

Yeah!

> 
>> +static void domain_flush_pasid_iotlb(struct intel_iommu *iommu,
>> +				     struct dmar_domain *domain, u64 addr,
>> +				     unsigned long npages, bool ih)
>> +{
>> +	u16 did = domain_id_iommu(domain, iommu);
>> +	struct dev_pasid_info *dev_pasid;
>> +	unsigned long flags;
>> +
>> +	spin_lock_irqsave(&domain->lock, flags);
>> +	list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain)
>> +		qi_flush_piotlb(iommu, did, dev_pasid->pasid, addr, npages,
>> ih);
>> +
>> +	if (!list_empty(&domain->devices))
>> +		qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr,
>> npages, ih);
> 
> Old code doesn't have this empty list check. I'm not sure whether any
> corner case might exist but if you do plan to add it it's better to put it
> in a separate patch to allow bisect.

Sure. Better to do it in a separated refactoring patch.

> 
>>   	spin_unlock_irqrestore(&domain->lock, flags);
>>   }
>>
>> @@ -1492,7 +1533,7 @@ static void iommu_flush_iotlb_psi(struct
>> intel_iommu *iommu,
>>   		ih = 1 << 6;
>>
>>   	if (domain->use_first_level) {
>> -		qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, pages,
>> ih);
>> +		domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih);
>>   	} else {
>>   		unsigned long bitmask = aligned_pages - 1;
>>
> 
> Why cannot this pasid be used with a second level config?

Perhaps I didn't get you correctly.

PASID based IOTLB invalidation is only for first level.

Spec 6.5.2.4:

The PASID-based-IOTLB Invalidate Descriptor (p_iotlb_inv_dsc) allows
software to invalidate IOTLB and the paging-structure-caches. This
descriptor is expected to be used when software has changed
first-stage tables and wants to invalidate affected cache entries.

IOTLB invalidation is for second level. See spec 6.5.2.3.

> 
>> @@ -4720,25 +4762,99 @@ static void intel_iommu_iotlb_sync_map(struct
>> iommu_domain *domain,
>>   static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t
>> pasid)
>>   {
>>   	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
>> +	struct dev_pasid_info *curr, *dev_pasid = NULL;
>> +	struct dmar_domain *dmar_domain;
>>   	struct iommu_domain *domain;
>> +	unsigned long flags;
>>
>> -	/* Domain type specific cleanup: */
>>   	domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
>> -	if (domain) {
>> -		switch (domain->type) {
>> -		case IOMMU_DOMAIN_SVA:
>> -			intel_svm_remove_dev_pasid(dev, pasid);
>> -			break;
>> -		default:
>> -			/* should never reach here */
>> -			WARN_ON(1);
>> +	if (!domain)
>> +		goto out_tear_down;
>> +
>> +	/*
>> +	 * The SVA implementation needs to stop mm notification, drain the
>> +	 * pending page fault requests before tearing down the pasid entry.
>> +	 * The VT-d spec (section 6.2.3.1) also recommends that software
>> +	 * could use a reserved domain id for all first-only and pass-through
>> +	 * translations. Hence there's no need to call
>> domain_detach_iommu()
>> +	 * in the sva domain case.
>> +	 */
> 
> It's probably clearer to say:
> 
> /*
>   * SVA domain requires special treatment before tearing down the pasid
>   * entry:
>   *   1) pasid is stored in mm instead of in dev_pasid;
>   *   2) all SVA domains share a reserved domain id per recommendation
>   *      from VT-d spec (section 6.2.3.1) so domain_detach_iommu() is
>   *      not required;
>   *   3) additional cleanup is required e.g. stopping mm notification,
>   *      draining the pending page fault requests, etc.
>   * Better handle it in a separate helper.
>   */

It's better.

>>
>> +static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
>> +				     struct device *dev, ioasid_t pasid)
>> +{
>> +	struct device_domain_info *info = dev_iommu_priv_get(dev);
>> +	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
>> +	struct intel_iommu *iommu = info->iommu;
>> +	struct dev_pasid_info *dev_pasid;
>> +	unsigned long flags;
>> +	int ret;
>> +
>> +	if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
>> +		return -EOPNOTSUPP;
>> +
>> +	if (context_copied(iommu, info->bus, info->devfn))
>> +		return -EBUSY;
>> +
>> +	ret = prepare_domain_attach_device(domain, dev);
>> +	if (ret)
>> +		return ret;
>> +
>> +	dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
>> +	if (!dev_pasid)
>> +		return -ENOMEM;
> 
> should it check whether this pasid has been attached?

Has been checked by iommu_attach_device_pasid() in core.

> 
>> +
>> +	ret = domain_attach_iommu(dmar_domain, iommu);
>> +	if (ret)
>> +		goto out_free;
>> +
>> +	if (domain_type_is_si(dmar_domain))
>> +		ret = intel_pasid_setup_pass_through(iommu, dmar_domain,
>> +						     dev, pasid);
>> +	else if (dmar_domain->use_first_level)
>> +		ret = domain_setup_first_level(iommu, dmar_domain,
>> +					       dev, pasid);
>> +	else
>> +		ret = intel_pasid_setup_second_level(iommu, dmar_domain,
>> +						     dev, pasid);
> 
> Here you allow attaching pasid to a domain using second-level but all
> prior changes are only for first-level.

As explained, prior changes are for pasid-base iotlb invalidation for
first level page table change. Or perhaps I didn't get you correctly?

Best regards,
baolu
Jason Gunthorpe May 30, 2023, 5:08 p.m. UTC | #3
> > +	/*
> > +	 * The SVA implementation needs to stop mm notification, drain the
> > +	 * pending page fault requests before tearing down the pasid entry.
> > +	 * The VT-d spec (section 6.2.3.1) also recommends that software
> > +	 * could use a reserved domain id for all first-only and pass-through
> > +	 * translations. Hence there's no need to call
> > domain_detach_iommu()
> > +	 * in the sva domain case.
> > +	 */
> 
> It's probably clearer to say:

Is this what is going on ??

> /*
>  * SVA domain requires special treatment before tearing down the pasid
>  * entry:
>  *   1) pasid is stored in mm instead of in dev_pasid;

Why? The mm pasid should not be used by any driver code, the PASID the
SVA is hooked to does NOT have to be the mm PASID.

>  *   2) all SVA domains share a reserved domain id per recommendation
>  *      from VT-d spec (section 6.2.3.1) so domain_detach_iommu() is
>  *      not required;

The DID should be managed and allocated for the S2 pointer and the
flushing logic should work genericly by tracking the S2's being used
and flushing their DIDs when all the S2s fall out of use. The special
identity S2 just gets a static DID that never falls out of use.

Jason
Baolu Lu May 31, 2023, 4:59 a.m. UTC | #4
On 5/31/23 1:08 AM, Jason Gunthorpe wrote:
>>> +	/*
>>> +	 * The SVA implementation needs to stop mm notification, drain the
>>> +	 * pending page fault requests before tearing down the pasid entry.
>>> +	 * The VT-d spec (section 6.2.3.1) also recommends that software
>>> +	 * could use a reserved domain id for all first-only and pass-through
>>> +	 * translations. Hence there's no need to call
>>> domain_detach_iommu()
>>> +	 * in the sva domain case.
>>> +	 */
>>
>> It's probably clearer to say:
> 
> Is this what is going on ??
> 
>> /*
>>   * SVA domain requires special treatment before tearing down the pasid
>>   * entry:
>>   *   1) pasid is stored in mm instead of in dev_pasid;
> 
> Why? The mm pasid should not be used by any driver code, the PASID the
> SVA is hooked to does NOT have to be the mm PASID.

Yes. The individual driver should not be aware of mm->pasid. The
set/remove_dev_pasid callbacks have already take pasid as the inputs.

> 
>>   *   2) all SVA domains share a reserved domain id per recommendation
>>   *      from VT-d spec (section 6.2.3.1) so domain_detach_iommu() is
>>   *      not required;
> 
> The DID should be managed and allocated for the S2 pointer and the
> flushing logic should work genericly by tracking the S2's being used
> and flushing their DIDs when all the S2s fall out of use. The special
> identity S2 just gets a static DID that never falls out of use.

The DID logic in this case is not about invalidating the DID. Instead,
it is about managing the DID's reference count to track its allocation
and release. The reference count is increased every time the DID is
assigned to hardware, and it is decreased when the DID is removed from
hardware. The DID is released when the count hits 0.

Some DIDs are reserved for special domains, like identity domain, SVA
domain, etc. For those DIDs, there's no need for reference count, hence
no need to call the helper.

For code simplify, perhaps we can enhance the helpers to take no action
for those special DIDs.

Best regards,
baolu
Tian, Kevin June 14, 2023, 8:10 a.m. UTC | #5
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Friday, May 26, 2023 10:43 AM
> 
> >
> >> +
> >> +	ret = domain_attach_iommu(dmar_domain, iommu);
> >> +	if (ret)
> >> +		goto out_free;
> >> +
> >> +	if (domain_type_is_si(dmar_domain))
> >> +		ret = intel_pasid_setup_pass_through(iommu, dmar_domain,
> >> +						     dev, pasid);
> >> +	else if (dmar_domain->use_first_level)
> >> +		ret = domain_setup_first_level(iommu, dmar_domain,
> >> +					       dev, pasid);
> >> +	else
> >> +		ret = intel_pasid_setup_second_level(iommu, dmar_domain,
> >> +						     dev, pasid);
> >
> > Here you allow attaching pasid to a domain using second-level but all
> > prior changes are only for first-level.
> 
> As explained, prior changes are for pasid-base iotlb invalidation for
> first level page table change. Or perhaps I didn't get you correctly?
> 

No. you are correct.
Tian, Kevin June 14, 2023, 8:13 a.m. UTC | #6
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Wednesday, May 31, 2023 1:09 AM
> 
> 
> >  *   2) all SVA domains share a reserved domain id per recommendation
> >  *      from VT-d spec (section 6.2.3.1) so domain_detach_iommu() is
> >  *      not required;
> 
> The DID should be managed and allocated for the S2 pointer and the
> flushing logic should work genericly by tracking the S2's being used
> and flushing their DIDs when all the S2s fall out of use. The special
> identity S2 just gets a static DID that never falls out of use.
> 

I'm not sure it's worthwhile to fake a identity S2 for the bare metal
SVA case. It's far simpler to special case the reserved DID value in
the SVM path.
diff mbox series

Patch

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 4eba9973f537..6386f7e4da04 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1367,6 +1367,7 @@  domain_lookup_dev_info(struct dmar_domain *domain,
 
 static void domain_update_iotlb(struct dmar_domain *domain)
 {
+	struct dev_pasid_info *dev_pasid;
 	struct device_domain_info *info;
 	bool has_iotlb_device = false;
 	unsigned long flags;
@@ -1378,6 +1379,14 @@  static void domain_update_iotlb(struct dmar_domain *domain)
 			break;
 		}
 	}
+
+	list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {
+		info = dev_iommu_priv_get(dev_pasid->dev);
+		if (info->ats_enabled) {
+			has_iotlb_device = true;
+			break;
+		}
+	}
 	domain->has_iotlb_device = has_iotlb_device;
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
@@ -1463,6 +1472,7 @@  static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
 				  u64 addr, unsigned mask)
 {
+	struct dev_pasid_info *dev_pasid;
 	struct device_domain_info *info;
 	unsigned long flags;
 
@@ -1472,6 +1482,37 @@  static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
 	spin_lock_irqsave(&domain->lock, flags);
 	list_for_each_entry(info, &domain->devices, link)
 		__iommu_flush_dev_iotlb(info, addr, mask);
+
+	list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {
+		info = dev_iommu_priv_get(dev_pasid->dev);
+		qi_flush_dev_iotlb_pasid(info->iommu,
+					 PCI_DEVID(info->bus, info->devfn),
+					 info->pfsid, dev_pasid->pasid,
+					 info->ats_qdep, addr,
+					 mask);
+	}
+	spin_unlock_irqrestore(&domain->lock, flags);
+}
+
+/*
+ * The VT-d spec requires to use PASID-based-IOTLB Invalidation to
+ * invalidate IOTLB and the paging-structure-caches for a first-stage
+ * page table.
+ */
+static void domain_flush_pasid_iotlb(struct intel_iommu *iommu,
+				     struct dmar_domain *domain, u64 addr,
+				     unsigned long npages, bool ih)
+{
+	u16 did = domain_id_iommu(domain, iommu);
+	struct dev_pasid_info *dev_pasid;
+	unsigned long flags;
+
+	spin_lock_irqsave(&domain->lock, flags);
+	list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain)
+		qi_flush_piotlb(iommu, did, dev_pasid->pasid, addr, npages, ih);
+
+	if (!list_empty(&domain->devices))
+		qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, npages, ih);
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
 
@@ -1492,7 +1533,7 @@  static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
 		ih = 1 << 6;
 
 	if (domain->use_first_level) {
-		qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, pages, ih);
+		domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih);
 	} else {
 		unsigned long bitmask = aligned_pages - 1;
 
@@ -1562,7 +1603,7 @@  static void intel_flush_iotlb_all(struct iommu_domain *domain)
 		u16 did = domain_id_iommu(dmar_domain, iommu);
 
 		if (dmar_domain->use_first_level)
-			qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, 0, -1, 0);
+			domain_flush_pasid_iotlb(iommu, dmar_domain, 0, -1, 0);
 		else
 			iommu->flush.flush_iotlb(iommu, did, 0, 0,
 						 DMA_TLB_DSI_FLUSH);
@@ -1734,6 +1775,7 @@  static struct dmar_domain *alloc_domain(unsigned int type)
 		domain->use_first_level = true;
 	domain->has_iotlb_device = false;
 	INIT_LIST_HEAD(&domain->devices);
+	INIT_LIST_HEAD(&domain->dev_pasids);
 	spin_lock_init(&domain->lock);
 	xa_init(&domain->iommu_array);
 
@@ -4720,25 +4762,99 @@  static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
 static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
 {
 	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
+	struct dev_pasid_info *curr, *dev_pasid = NULL;
+	struct dmar_domain *dmar_domain;
 	struct iommu_domain *domain;
+	unsigned long flags;
 
-	/* Domain type specific cleanup: */
 	domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
-	if (domain) {
-		switch (domain->type) {
-		case IOMMU_DOMAIN_SVA:
-			intel_svm_remove_dev_pasid(dev, pasid);
-			break;
-		default:
-			/* should never reach here */
-			WARN_ON(1);
+	if (!domain)
+		goto out_tear_down;
+
+	/*
+	 * The SVA implementation needs to stop mm notification, drain the
+	 * pending page fault requests before tearing down the pasid entry.
+	 * The VT-d spec (section 6.2.3.1) also recommends that software
+	 * could use a reserved domain id for all first-only and pass-through
+	 * translations. Hence there's no need to call domain_detach_iommu()
+	 * in the sva domain case.
+	 */
+	if (domain->type == IOMMU_DOMAIN_SVA) {
+		intel_svm_remove_dev_pasid(dev, pasid);
+		goto out_tear_down;
+	}
+
+	dmar_domain = to_dmar_domain(domain);
+	spin_lock_irqsave(&dmar_domain->lock, flags);
+	list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) {
+		if (curr->dev == dev && curr->pasid == pasid) {
+			list_del(&curr->link_domain);
+			dev_pasid = curr;
 			break;
 		}
 	}
+	spin_unlock_irqrestore(&dmar_domain->lock, flags);
 
+	domain_detach_iommu(dmar_domain, iommu);
+	kfree(dev_pasid);
+out_tear_down:
 	intel_pasid_tear_down_entry(iommu, dev, pasid, false);
 }
 
+static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
+				     struct device *dev, ioasid_t pasid)
+{
+	struct device_domain_info *info = dev_iommu_priv_get(dev);
+	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+	struct intel_iommu *iommu = info->iommu;
+	struct dev_pasid_info *dev_pasid;
+	unsigned long flags;
+	int ret;
+
+	if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
+		return -EOPNOTSUPP;
+
+	if (context_copied(iommu, info->bus, info->devfn))
+		return -EBUSY;
+
+	ret = prepare_domain_attach_device(domain, dev);
+	if (ret)
+		return ret;
+
+	dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
+	if (!dev_pasid)
+		return -ENOMEM;
+
+	ret = domain_attach_iommu(dmar_domain, iommu);
+	if (ret)
+		goto out_free;
+
+	if (domain_type_is_si(dmar_domain))
+		ret = intel_pasid_setup_pass_through(iommu, dmar_domain,
+						     dev, pasid);
+	else if (dmar_domain->use_first_level)
+		ret = domain_setup_first_level(iommu, dmar_domain,
+					       dev, pasid);
+	else
+		ret = intel_pasid_setup_second_level(iommu, dmar_domain,
+						     dev, pasid);
+	if (ret)
+		goto out_detach_iommu;
+
+	dev_pasid->dev = dev;
+	dev_pasid->pasid = pasid;
+	spin_lock_irqsave(&dmar_domain->lock, flags);
+	list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
+	spin_unlock_irqrestore(&dmar_domain->lock, flags);
+
+	return 0;
+out_detach_iommu:
+	domain_detach_iommu(dmar_domain, iommu);
+out_free:
+	kfree(dev_pasid);
+	return ret;
+}
+
 const struct iommu_ops intel_iommu_ops = {
 	.capable		= intel_iommu_capable,
 	.domain_alloc		= intel_iommu_domain_alloc,
@@ -4758,6 +4874,7 @@  const struct iommu_ops intel_iommu_ops = {
 #endif
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev		= intel_iommu_attach_device,
+		.set_dev_pasid		= intel_iommu_set_dev_pasid,
 		.map_pages		= intel_iommu_map_pages,
 		.unmap_pages		= intel_iommu_unmap_pages,
 		.iotlb_sync_map		= intel_iommu_iotlb_sync_map,
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 1c5e1d88862b..30c30e00fbdf 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -595,6 +595,7 @@  struct dmar_domain {
 
 	spinlock_t lock;		/* Protect device tracking lists */
 	struct list_head devices;	/* all devices' list */
+	struct list_head dev_pasids;	/* all attached pasids */
 
 	struct dma_pte	*pgd;		/* virtual address */
 	int		gaw;		/* max guest address width */
@@ -717,6 +718,12 @@  struct device_domain_info {
 	struct pasid_table *pasid_table; /* pasid table */
 };
 
+struct dev_pasid_info {
+	struct list_head link_domain;	/* link to domain siblings */
+	struct device *dev;		/* the physical device */
+	ioasid_t pasid;			/* PASID of the physical device */
+};
+
 static inline void __iommu_flush_cache(
 	struct intel_iommu *iommu, void *addr, int size)
 {