Message ID | 20230519203223.2777255-4-jacob.jun.pan@linux.intel.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Re-enable IDXD kernel workqueue under DMA API | expand |
On Fri, May 19, 2023 at 01:32:22PM -0700, Jacob Pan wrote: > @@ -4720,25 +4762,99 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, > static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) > { > struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); > + struct dev_pasid_info *curr, *dev_pasid = NULL; > + struct dmar_domain *dmar_domain; > struct iommu_domain *domain; > + unsigned long flags; > > - /* Domain type specific cleanup: */ > domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); > - if (domain) { > - switch (domain->type) { > - case IOMMU_DOMAIN_SVA: > - intel_svm_remove_dev_pasid(dev, pasid); > - break; > - default: > - /* should never reach here */ > - WARN_ON(1); > + if (!domain) > + goto out_tear_down; > + > + /* > + * The SVA implementation needs to stop mm notification, drain the > + * pending page fault requests before tearing down the pasid entry. > + * The VT-d spec (section 6.2.3.1) also recommends that software > + * could use a reserved domain id for all first-only and pass-through > + * translations. Hence there's no need to call domain_detach_iommu() > + * in the sva domain case. > + */ > + if (domain->type == IOMMU_DOMAIN_SVA) { > + intel_svm_remove_dev_pasid(dev, pasid); > + goto out_tear_down; > + } But why don't you need to do all the other intel_pasid_tear_down_entry(), intel_svm_drain_prq() (which is misnamed) and other stuff from intel_svm_remove_dev_pasid() ? There still seems to be waaay too much "SVM" in the PASID code. > +static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, > + struct device *dev, ioasid_t pasid) > +{ > + struct device_domain_info *info = dev_iommu_priv_get(dev); > + struct dmar_domain *dmar_domain = to_dmar_domain(domain); > + struct intel_iommu *iommu = info->iommu; > + struct dev_pasid_info *dev_pasid; > + unsigned long flags; > + int ret; > + > + if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) > + return -EOPNOTSUPP; > + > + if (context_copied(iommu, info->bus, info->devfn)) > + return -EBUSY; > + > + ret = prepare_domain_attach_device(domain, dev); > + if (ret) > + return ret; > + > + dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); > + if (!dev_pasid) > + return -ENOMEM; > + > + ret = domain_attach_iommu(dmar_domain, iommu); > + if (ret) > + goto out_free; > + > + if (domain_type_is_si(dmar_domain)) > + ret = intel_pasid_setup_pass_through(iommu, dmar_domain, > + dev, pasid); > + else if (dmar_domain->use_first_level) > + ret = domain_setup_first_level(iommu, dmar_domain, > + dev, pasid); > + else > + ret = intel_pasid_setup_second_level(iommu, dmar_domain, > + dev, pasid); It would be nice if the different domain types had their own ops.. Jason
On 5/30/23 3:48 AM, Jason Gunthorpe wrote: > On Fri, May 19, 2023 at 01:32:22PM -0700, Jacob Pan wrote: > >> @@ -4720,25 +4762,99 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, >> static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) >> { >> struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); >> + struct dev_pasid_info *curr, *dev_pasid = NULL; >> + struct dmar_domain *dmar_domain; >> struct iommu_domain *domain; >> + unsigned long flags; >> >> - /* Domain type specific cleanup: */ >> domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); >> - if (domain) { >> - switch (domain->type) { >> - case IOMMU_DOMAIN_SVA: >> - intel_svm_remove_dev_pasid(dev, pasid); >> - break; >> - default: >> - /* should never reach here */ >> - WARN_ON(1); >> + if (!domain) >> + goto out_tear_down; >> + >> + /* >> + * The SVA implementation needs to stop mm notification, drain the >> + * pending page fault requests before tearing down the pasid entry. >> + * The VT-d spec (section 6.2.3.1) also recommends that software >> + * could use a reserved domain id for all first-only and pass-through >> + * translations. Hence there's no need to call domain_detach_iommu() >> + * in the sva domain case. >> + */ >> + if (domain->type == IOMMU_DOMAIN_SVA) { >> + intel_svm_remove_dev_pasid(dev, pasid); >> + goto out_tear_down; >> + } > > But why don't you need to do all the other > intel_pasid_tear_down_entry(), intel_svm_drain_prq() (which is > misnamed) and other stuff from intel_svm_remove_dev_pasid() ? Perhaps, if (domain->type == IOMMU_DOMAIN_SVA) { intel_svm_remove_dev_pasid(dev, pasid); return; } ? > > There still seems to be waaay too much "SVM" in the PASID code. This segment of code is destined to be temporary. From a long-term perspective, I hope to move SVA specific staffs such as mm notification, prq draining, etc. to the iommu core. They are generic rather than Intel iommu specific. After the code consolidation done, the code here will become simpler and appealing. We just need to tear down the pasid entry. > >> +static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, >> + struct device *dev, ioasid_t pasid) >> +{ >> + struct device_domain_info *info = dev_iommu_priv_get(dev); >> + struct dmar_domain *dmar_domain = to_dmar_domain(domain); >> + struct intel_iommu *iommu = info->iommu; >> + struct dev_pasid_info *dev_pasid; >> + unsigned long flags; >> + int ret; >> + >> + if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) >> + return -EOPNOTSUPP; >> + >> + if (context_copied(iommu, info->bus, info->devfn)) >> + return -EBUSY; >> + >> + ret = prepare_domain_attach_device(domain, dev); >> + if (ret) >> + return ret; >> + >> + dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); >> + if (!dev_pasid) >> + return -ENOMEM; >> + >> + ret = domain_attach_iommu(dmar_domain, iommu); >> + if (ret) >> + goto out_free; >> + >> + if (domain_type_is_si(dmar_domain)) >> + ret = intel_pasid_setup_pass_through(iommu, dmar_domain, >> + dev, pasid); >> + else if (dmar_domain->use_first_level) >> + ret = domain_setup_first_level(iommu, dmar_domain, >> + dev, pasid); >> + else >> + ret = intel_pasid_setup_second_level(iommu, dmar_domain, >> + dev, pasid); > > It would be nice if the different domain types had their own ops.. Good suggestion! We can add a domain ops in the Intel domain structure which is responsible for how to install an Intel iommu domain onto the VT-d hardware. It worth a separated refactoring series. Let me do it afterward. Best regards, baolu
On Tue, May 30, 2023 at 10:19:05AM +0800, Baolu Lu wrote: > On 5/30/23 3:48 AM, Jason Gunthorpe wrote: > > On Fri, May 19, 2023 at 01:32:22PM -0700, Jacob Pan wrote: > > > > > @@ -4720,25 +4762,99 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, > > > static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) > > > { > > > struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); > > > + struct dev_pasid_info *curr, *dev_pasid = NULL; > > > + struct dmar_domain *dmar_domain; > > > struct iommu_domain *domain; > > > + unsigned long flags; > > > - /* Domain type specific cleanup: */ > > > domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); > > > - if (domain) { > > > - switch (domain->type) { > > > - case IOMMU_DOMAIN_SVA: > > > - intel_svm_remove_dev_pasid(dev, pasid); > > > - break; > > > - default: > > > - /* should never reach here */ > > > - WARN_ON(1); > > > + if (!domain) > > > + goto out_tear_down; > > > + > > > + /* > > > + * The SVA implementation needs to stop mm notification, drain the > > > + * pending page fault requests before tearing down the pasid entry. > > > + * The VT-d spec (section 6.2.3.1) also recommends that software > > > + * could use a reserved domain id for all first-only and pass-through > > > + * translations. Hence there's no need to call domain_detach_iommu() > > > + * in the sva domain case. > > > + */ > > > + if (domain->type == IOMMU_DOMAIN_SVA) { > > > + intel_svm_remove_dev_pasid(dev, pasid); > > > + goto out_tear_down; > > > + } > > > > But why don't you need to do all the other > > intel_pasid_tear_down_entry(), intel_svm_drain_prq() (which is > > misnamed) and other stuff from intel_svm_remove_dev_pasid() ? > > Perhaps, > > if (domain->type == IOMMU_DOMAIN_SVA) { > intel_svm_remove_dev_pasid(dev, pasid); > return; > } > > ? I would expect only stuff directly connected to SVM be in the SVM function. De-initalizing PRI and any other pasid destruction should be in this function. > > There still seems to be waaay too much "SVM" in the PASID code. > > This segment of code is destined to be temporary. From a long-term > perspective, I hope to move SVA specific staffs such as mm notification, > prq draining, etc. to the iommu core. They are generic rather than Intel > iommu specific. Yes, sort of, but.. That is just the mmu notifier bits All the PRI/PASID teardown needs to be unlinked from SVM > > It would be nice if the different domain types had their own ops.. > > Good suggestion! > > We can add a domain ops in the Intel domain structure which is > responsible for how to install an Intel iommu domain onto the VT-d > hardware. We should have seperate iommu_domain_ops at least, I think that would cover alot of it? Jason
On 5/31/23 12:55 AM, Jason Gunthorpe wrote: > On Tue, May 30, 2023 at 10:19:05AM +0800, Baolu Lu wrote: >> On 5/30/23 3:48 AM, Jason Gunthorpe wrote: >>> On Fri, May 19, 2023 at 01:32:22PM -0700, Jacob Pan wrote: >>> >>>> @@ -4720,25 +4762,99 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, >>>> static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) >>>> { >>>> struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); >>>> + struct dev_pasid_info *curr, *dev_pasid = NULL; >>>> + struct dmar_domain *dmar_domain; >>>> struct iommu_domain *domain; >>>> + unsigned long flags; >>>> - /* Domain type specific cleanup: */ >>>> domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); >>>> - if (domain) { >>>> - switch (domain->type) { >>>> - case IOMMU_DOMAIN_SVA: >>>> - intel_svm_remove_dev_pasid(dev, pasid); >>>> - break; >>>> - default: >>>> - /* should never reach here */ >>>> - WARN_ON(1); >>>> + if (!domain) >>>> + goto out_tear_down; >>>> + >>>> + /* >>>> + * The SVA implementation needs to stop mm notification, drain the >>>> + * pending page fault requests before tearing down the pasid entry. >>>> + * The VT-d spec (section 6.2.3.1) also recommends that software >>>> + * could use a reserved domain id for all first-only and pass-through >>>> + * translations. Hence there's no need to call domain_detach_iommu() >>>> + * in the sva domain case. >>>> + */ >>>> + if (domain->type == IOMMU_DOMAIN_SVA) { >>>> + intel_svm_remove_dev_pasid(dev, pasid); >>>> + goto out_tear_down; >>>> + } >>> >>> But why don't you need to do all the other >>> intel_pasid_tear_down_entry(), intel_svm_drain_prq() (which is >>> misnamed) and other stuff from intel_svm_remove_dev_pasid() ? >> >> Perhaps, >> >> if (domain->type == IOMMU_DOMAIN_SVA) { >> intel_svm_remove_dev_pasid(dev, pasid); >> return; >> } >> >> ? > > I would expect only stuff directly connected to SVM be in the SVM > function. > > De-initalizing PRI and any other pasid destruction should be in this > function. > >>> There still seems to be waaay too much "SVM" in the PASID code. >> >> This segment of code is destined to be temporary. From a long-term >> perspective, I hope to move SVA specific staffs such as mm notification, >> prq draining, etc. to the iommu core. They are generic rather than Intel >> iommu specific. > > Yes, sort of, but.. That is just the mmu notifier bits > > All the PRI/PASID teardown needs to be unlinked from SVM Get your point now. Yes. PRI and PASID teardown are not SVA-specific. Sorry that we should rename SVM to SVA to unify the Linux terminology. > >>> It would be nice if the different domain types had their own ops.. >> >> Good suggestion! >> >> We can add a domain ops in the Intel domain structure which is >> responsible for how to install an Intel iommu domain onto the VT-d >> hardware. > > We should have seperate iommu_domain_ops at least, I think that would > cover alot of it? Are you suggesting adding this ops in common iommu_domain or intel's dmar_domain? My understanding is the latter. To do so, probably we need to define various callbacks for different type of domains: identity, blocking, dma remapping, sva and possibly nested. Also need to care about legacy vs. scalable mode. That's the reason why I hoped to do all these in separated series with carefully reviewing and testing. Best regards, baolu
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 5b4ec7cfc1a4..f5f9ad8953cc 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1367,6 +1367,7 @@ domain_lookup_dev_info(struct dmar_domain *domain, static void domain_update_iotlb(struct dmar_domain *domain) { + struct dev_pasid_info *dev_pasid; struct device_domain_info *info; bool has_iotlb_device = false; unsigned long flags; @@ -1378,6 +1379,14 @@ static void domain_update_iotlb(struct dmar_domain *domain) break; } } + + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { + info = dev_iommu_priv_get(dev_pasid->dev); + if (info->ats_enabled) { + has_iotlb_device = true; + break; + } + } domain->has_iotlb_device = has_iotlb_device; spin_unlock_irqrestore(&domain->lock, flags); } @@ -1463,6 +1472,7 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info, static void iommu_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, unsigned mask) { + struct dev_pasid_info *dev_pasid; struct device_domain_info *info; unsigned long flags; @@ -1472,6 +1482,37 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, spin_lock_irqsave(&domain->lock, flags); list_for_each_entry(info, &domain->devices, link) __iommu_flush_dev_iotlb(info, addr, mask); + + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { + info = dev_iommu_priv_get(dev_pasid->dev); + qi_flush_dev_iotlb_pasid(info->iommu, + PCI_DEVID(info->bus, info->devfn), + info->pfsid, dev_pasid->pasid, + info->ats_qdep, addr, + mask); + } + spin_unlock_irqrestore(&domain->lock, flags); +} + +/* + * The VT-d spec requires to use PASID-based-IOTLB Invalidation to + * invalidate IOTLB and the paging-structure-caches for a first-stage + * page table. + */ +static void domain_flush_pasid_iotlb(struct intel_iommu *iommu, + struct dmar_domain *domain, u64 addr, + unsigned long npages, bool ih) +{ + u16 did = domain_id_iommu(domain, iommu); + struct dev_pasid_info *dev_pasid; + unsigned long flags; + + spin_lock_irqsave(&domain->lock, flags); + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) + qi_flush_piotlb(iommu, did, dev_pasid->pasid, addr, npages, ih); + + if (!list_empty(&domain->devices)) + qi_flush_piotlb(iommu, did, IOMMU_DEF_RID_PASID, addr, npages, ih); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1492,7 +1533,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, ih = 1 << 6; if (domain->use_first_level) { - qi_flush_piotlb(iommu, did, IOMMU_DEF_RID_PASID, addr, pages, ih); + domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih); } else { unsigned long bitmask = aligned_pages - 1; @@ -1562,7 +1603,7 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain) u16 did = domain_id_iommu(dmar_domain, iommu); if (dmar_domain->use_first_level) - qi_flush_piotlb(iommu, did, IOMMU_DEF_RID_PASID, 0, -1, 0); + domain_flush_pasid_iotlb(iommu, dmar_domain, 0, -1, 0); else iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); @@ -1734,6 +1775,7 @@ static struct dmar_domain *alloc_domain(unsigned int type) domain->use_first_level = true; domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); + INIT_LIST_HEAD(&domain->dev_pasids); spin_lock_init(&domain->lock); xa_init(&domain->iommu_array); @@ -4720,25 +4762,99 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) { struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); + struct dev_pasid_info *curr, *dev_pasid = NULL; + struct dmar_domain *dmar_domain; struct iommu_domain *domain; + unsigned long flags; - /* Domain type specific cleanup: */ domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); - if (domain) { - switch (domain->type) { - case IOMMU_DOMAIN_SVA: - intel_svm_remove_dev_pasid(dev, pasid); - break; - default: - /* should never reach here */ - WARN_ON(1); + if (!domain) + goto out_tear_down; + + /* + * The SVA implementation needs to stop mm notification, drain the + * pending page fault requests before tearing down the pasid entry. + * The VT-d spec (section 6.2.3.1) also recommends that software + * could use a reserved domain id for all first-only and pass-through + * translations. Hence there's no need to call domain_detach_iommu() + * in the sva domain case. + */ + if (domain->type == IOMMU_DOMAIN_SVA) { + intel_svm_remove_dev_pasid(dev, pasid); + goto out_tear_down; + } + + dmar_domain = to_dmar_domain(domain); + spin_lock_irqsave(&dmar_domain->lock, flags); + list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) { + if (curr->dev == dev && curr->pasid == pasid) { + list_del(&curr->link_domain); + dev_pasid = curr; break; } } + spin_unlock_irqrestore(&dmar_domain->lock, flags); + domain_detach_iommu(dmar_domain, iommu); + kfree(dev_pasid); +out_tear_down: intel_pasid_tear_down_entry(iommu, dev, pasid, false); } +static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct intel_iommu *iommu = info->iommu; + struct dev_pasid_info *dev_pasid; + unsigned long flags; + int ret; + + if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) + return -EOPNOTSUPP; + + if (context_copied(iommu, info->bus, info->devfn)) + return -EBUSY; + + ret = prepare_domain_attach_device(domain, dev); + if (ret) + return ret; + + dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); + if (!dev_pasid) + return -ENOMEM; + + ret = domain_attach_iommu(dmar_domain, iommu); + if (ret) + goto out_free; + + if (domain_type_is_si(dmar_domain)) + ret = intel_pasid_setup_pass_through(iommu, dmar_domain, + dev, pasid); + else if (dmar_domain->use_first_level) + ret = domain_setup_first_level(iommu, dmar_domain, + dev, pasid); + else + ret = intel_pasid_setup_second_level(iommu, dmar_domain, + dev, pasid); + if (ret) + goto out_detach_iommu; + + dev_pasid->dev = dev; + dev_pasid->pasid = pasid; + spin_lock_irqsave(&dmar_domain->lock, flags); + list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids); + spin_unlock_irqrestore(&dmar_domain->lock, flags); + + return 0; +out_detach_iommu: + domain_detach_iommu(dmar_domain, iommu); +out_free: + kfree(dev_pasid); + return ret; +} + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, @@ -4758,6 +4874,7 @@ const struct iommu_ops intel_iommu_ops = { #endif .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = intel_iommu_attach_device, + .set_dev_pasid = intel_iommu_set_dev_pasid, .map_pages = intel_iommu_map_pages, .unmap_pages = intel_iommu_unmap_pages, .iotlb_sync_map = intel_iommu_iotlb_sync_map, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 1c5e1d88862b..30c30e00fbdf 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -595,6 +595,7 @@ struct dmar_domain { spinlock_t lock; /* Protect device tracking lists */ struct list_head devices; /* all devices' list */ + struct list_head dev_pasids; /* all attached pasids */ struct dma_pte *pgd; /* virtual address */ int gaw; /* max guest address width */ @@ -717,6 +718,12 @@ struct device_domain_info { struct pasid_table *pasid_table; /* pasid table */ }; +struct dev_pasid_info { + struct list_head link_domain; /* link to domain siblings */ + struct device *dev; /* the physical device */ + ioasid_t pasid; /* PASID of the physical device */ +}; + static inline void __iommu_flush_cache( struct intel_iommu *iommu, void *addr, int size) {