Message ID | 20230407180554.2784285-6-jacob.jun.pan@linux.intel.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Re-enable IDXD kernel workqueue under DMA API | expand |
On 4/8/23 2:05 AM, Jacob Pan wrote: > @@ -2429,10 +2475,11 @@ static int __init si_domain_init(int hw) > return 0; > } > > -static int dmar_domain_attach_device(struct dmar_domain *domain, > - struct device *dev) > +static int dmar_domain_attach_device_pasid(struct dmar_domain *domain, > + struct device *dev, ioasid_t pasid) > { > struct device_domain_info *info = dev_iommu_priv_get(dev); > + struct device_pasid_info *dev_pasid; > struct intel_iommu *iommu; > unsigned long flags; > u8 bus, devfn; > @@ -2442,43 +2489,57 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, > if (!iommu) > return -ENODEV; > > + dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); > + if (!dev_pasid) > + return -ENOMEM; > + > ret = domain_attach_iommu(domain, iommu); > if (ret) > - return ret; > + goto exit_free; > + > info->domain = domain; > + dev_pasid->pasid = pasid; > + dev_pasid->dev = dev; > spin_lock_irqsave(&domain->lock, flags); > - list_add(&info->link, &domain->devices); > + if (!info->dev_attached) > + list_add(&info->link, &domain->devices); > + > + list_add(&dev_pasid->link_domain, &domain->dev_pasids); > spin_unlock_irqrestore(&domain->lock, flags); > > /* PASID table is mandatory for a PCI device in scalable mode. */ > if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { > /* Setup the PASID entry for requests without PASID: */ > if (hw_pass_through && domain_type_is_si(domain)) > - ret = intel_pasid_setup_pass_through(iommu, domain, > - dev, PASID_RID2PASID); > + ret = intel_pasid_setup_pass_through(iommu, domain, dev, pasid); > else if (domain->use_first_level) > - ret = domain_setup_first_level(iommu, domain, dev, > - PASID_RID2PASID); > + ret = domain_setup_first_level(iommu, domain, dev, pasid); > else > - ret = intel_pasid_setup_second_level(iommu, domain, > - dev, PASID_RID2PASID); > + ret = intel_pasid_setup_second_level(iommu, domain, dev, pasid); > if (ret) { > - dev_err(dev, "Setup RID2PASID failed\n"); > + dev_err(dev, "Setup PASID %d failed\n", pasid); > device_block_translation(dev); > - return ret; > + goto exit_free; > } > } > + /* device context already activated, we are done */ > + if (info->dev_attached) > + goto exit; > > ret = domain_context_mapping(domain, dev); > if (ret) { > dev_err(dev, "Domain context map failed\n"); > device_block_translation(dev); > - return ret; > + goto exit_free; > } > > iommu_enable_pci_caps(info); > - > + info->dev_attached = 1; > +exit: > return 0; > +exit_free: > + kfree(dev_pasid); > + return ret; > } > > static bool device_has_rmrr(struct device *dev) > @@ -4029,8 +4090,7 @@ static void device_block_translation(struct device *dev) > iommu_disable_pci_caps(info); > if (!dev_is_real_dma_subdevice(dev)) { > if (sm_supported(iommu)) > - intel_pasid_tear_down_entry(iommu, dev, > - PASID_RID2PASID, false); > + intel_iommu_detach_device_pasid(&info->domain->domain, dev, PASID_RID2PASID); > else > domain_context_clear(info); > } > @@ -4040,6 +4100,7 @@ static void device_block_translation(struct device *dev) > > spin_lock_irqsave(&info->domain->lock, flags); > list_del(&info->link); > + info->dev_attached = 0; > spin_unlock_irqrestore(&info->domain->lock, flags); > > domain_detach_iommu(info->domain, iommu); > @@ -4186,7 +4247,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, > if (ret) > return ret; > > - return dmar_domain_attach_device(to_dmar_domain(domain), dev); > + return dmar_domain_attach_device_pasid(to_dmar_domain(domain), dev, PASID_RID2PASID); > } For VT-d driver, attach_dev and attach_dev_pasid have different meanings. Merging them into one helper may lead to confusion. What do you think of the following code? The dmar_domain_attach_device_pasid() helper could be reused for attach_dev_pasid path. diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7c2f4bd33582..09ae62bc3724 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2434,6 +2434,40 @@ static int __init si_domain_init(int hw) return 0; } + +static int dmar_domain_attach_device_pasid(struct dmar_domain *domain, + struct intel_iommu *iommu, + struct device *dev, ioasid_t pasid) +{ + struct device_pasid_info *dev_pasid; + unsigned long flags; + int ret; + + dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); + if (!dev_pasid) + return -ENOMEM; + + if (hw_pass_through && domain_type_is_si(domain)) + ret = intel_pasid_setup_pass_through(iommu, domain, dev, pasid); + else if (domain->use_first_level) + ret = domain_setup_first_level(iommu, domain, dev, pasid); + else + ret = intel_pasid_setup_second_level(iommu, domain, dev, pasid); + + if (ret) { + kfree(dev_pasid); + return ret; + } + + dev_pasid->pasid = pasid; + dev_pasid->dev = dev; + spin_lock_irqsave(&domain->lock, flags); + list_add(&dev_pasid->link_domain, &domain->dev_pasids); + spin_unlock_irqrestore(&domain->lock, flags); + + return 0; +} + static int dmar_domain_attach_device(struct dmar_domain *domain, struct device *dev) { @@ -2458,15 +2492,8 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, /* PASID table is mandatory for a PCI device in scalable mode. */ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { /* Setup the PASID entry for requests without PASID: */ - if (hw_pass_through && domain_type_is_si(domain)) - ret = intel_pasid_setup_pass_through(iommu, domain, - dev, PASID_RID2PASID); - else if (domain->use_first_level) - ret = domain_setup_first_level(iommu, domain, dev, - PASID_RID2PASID); - else - ret = intel_pasid_setup_second_level(iommu, domain, - dev, PASID_RID2PASID); + ret = dmar_domain_attach_device_pasid(domain, iommu, dev, + PASID_RID2PASID); if (ret) { dev_err(dev, "Setup RID2PASID failed\n"); device_block_translation(dev); > > static int intel_iommu_map(struct iommu_domain *domain, > @@ -4675,26 +4736,52 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, > __mapping_notify_one(info->iommu, dmar_domain, pfn, pages); > } > > -static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) > +static void intel_iommu_detach_device_pasid(struct iommu_domain *domain, > + struct device *dev, ioasid_t pasid) > { > - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); > - struct iommu_domain *domain; > + struct device_domain_info *info = dev_iommu_priv_get(dev); > + struct dmar_domain *dmar_domain = to_dmar_domain(domain); > + struct device_pasid_info *i, *dev_pasid = NULL; > + struct intel_iommu *iommu = info->iommu; > + unsigned long flags; > > - /* Domain type specific cleanup: */ > - domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); > - if (domain) { > - switch (domain->type) { > - case IOMMU_DOMAIN_SVA: > - intel_svm_remove_dev_pasid(dev, pasid); > - break; > - default: > - /* should never reach here */ > - WARN_ON(1); > + spin_lock_irqsave(&dmar_domain->lock, flags); > + list_for_each_entry(i, &dmar_domain->dev_pasids, link_domain) { > + if (i->dev == dev && i->pasid == pasid) { > + list_del(&i->link_domain); > + dev_pasid = i; > break; > } > } > + spin_unlock_irqrestore(&dmar_domain->lock, flags); > + if (WARN_ON(!dev_pasid)) > + return; > + > + /* PASID entry already cleared during SVA unbind */ > + if (domain->type != IOMMU_DOMAIN_SVA) > + intel_pasid_tear_down_entry(iommu, dev, pasid, false); > + > + kfree(dev_pasid); > +} > + > +static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) > +{ > + struct device_domain_info *info = dev_iommu_priv_get(dev); > + struct dmar_domain *dmar_domain; > + struct iommu_domain *domain; > + > + domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); > + dmar_domain = to_dmar_domain(domain); > + > + /* > + * SVA Domain type specific cleanup: Not ideal but not until we have > + * IOPF capable domain specific ops, we need this special case. > + */ > + if (domain->type == IOMMU_DOMAIN_SVA) > + return intel_svm_remove_dev_pasid(dev, pasid); > > - intel_pasid_tear_down_entry(iommu, dev, pasid, false); > + intel_iommu_detach_device_pasid(domain, dev, pasid); > + domain_detach_iommu(dmar_domain, info->iommu); > } The remove_dev_pasid path need to change only after attach_dev_pasid op is added, right? If so, we should move such change into the next patch. > > const struct iommu_ops intel_iommu_ops = { > diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h > index 65b15be72878..b6c26f25d1ba 100644 > --- a/drivers/iommu/intel/iommu.h > +++ b/drivers/iommu/intel/iommu.h > @@ -595,6 +595,7 @@ struct dmar_domain { > > spinlock_t lock; /* Protect device tracking lists */ > struct list_head devices; /* all devices' list */ > + struct list_head dev_pasids; /* all attached pasids */ > > struct dma_pte *pgd; /* virtual address */ > int gaw; /* max guest address width */ > @@ -708,6 +709,7 @@ struct device_domain_info { > u8 ats_supported:1; > u8 ats_enabled:1; > u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */ > + u8 dev_attached:1; /* Device context activated */ > u8 ats_qdep; > struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ > struct intel_iommu *iommu; /* IOMMU used by this device */ > @@ -715,6 +717,12 @@ struct device_domain_info { > struct pasid_table *pasid_table; /* pasid table */ > }; > > +struct device_pasid_info { > + struct list_head link_domain; /* link to domain siblings */ > + struct device *dev; /* physical device derived from */ > + ioasid_t pasid; /* PASID on physical device */ > +}; > + > static inline void __iommu_flush_cache( > struct intel_iommu *iommu, void *addr, int size) > { Best regards, baolu
On 4/10/23 10:46 AM, Baolu Lu wrote: >> @@ -4040,6 +4100,7 @@ static void device_block_translation(struct >> device *dev) >> spin_lock_irqsave(&info->domain->lock, flags); >> list_del(&info->link); >> + info->dev_attached = 0; >> spin_unlock_irqrestore(&info->domain->lock, flags); >> domain_detach_iommu(info->domain, iommu); >> @@ -4186,7 +4247,7 @@ static int intel_iommu_attach_device(struct >> iommu_domain *domain, >> if (ret) >> return ret; >> - return dmar_domain_attach_device(to_dmar_domain(domain), dev); >> + return dmar_domain_attach_device_pasid(to_dmar_domain(domain), >> dev, PASID_RID2PASID); >> } > > For VT-d driver, attach_dev and attach_dev_pasid have different > meanings. Merging them into one helper may lead to confusion. What do > you think of the following code? The dmar_domain_attach_device_pasid() > helper could be reused for attach_dev_pasid path. > > diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c > index 7c2f4bd33582..09ae62bc3724 100644 > --- a/drivers/iommu/intel/iommu.c > +++ b/drivers/iommu/intel/iommu.c > @@ -2434,6 +2434,40 @@ static int __init si_domain_init(int hw) > return 0; > } > > + > +static int dmar_domain_attach_device_pasid(struct dmar_domain *domain, > + struct intel_iommu *iommu, > + struct device *dev, ioasid_t pasid) > +{ > + struct device_pasid_info *dev_pasid; > + unsigned long flags; > + int ret; > + > + dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); > + if (!dev_pasid) > + return -ENOMEM; > + > + if (hw_pass_through && domain_type_is_si(domain)) > + ret = intel_pasid_setup_pass_through(iommu, domain, dev, pasid); > + else if (domain->use_first_level) > + ret = domain_setup_first_level(iommu, domain, dev, pasid); > + else > + ret = intel_pasid_setup_second_level(iommu, domain, dev, pasid); > + > + if (ret) { > + kfree(dev_pasid); > + return ret; > + } > + > + dev_pasid->pasid = pasid; > + dev_pasid->dev = dev; > + spin_lock_irqsave(&domain->lock, flags); > + list_add(&dev_pasid->link_domain, &domain->dev_pasids); > + spin_unlock_irqrestore(&domain->lock, flags); > + > + return 0; > +} > + > static int dmar_domain_attach_device(struct dmar_domain *domain, > struct device *dev) > { > @@ -2458,15 +2492,8 @@ static int dmar_domain_attach_device(struct > dmar_domain *domain, > /* PASID table is mandatory for a PCI device in scalable mode. */ > if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { > /* Setup the PASID entry for requests without PASID: */ > - if (hw_pass_through && domain_type_is_si(domain)) > - ret = intel_pasid_setup_pass_through(iommu, domain, > - dev, PASID_RID2PASID); > - else if (domain->use_first_level) > - ret = domain_setup_first_level(iommu, domain, dev, > - PASID_RID2PASID); > - else > - ret = intel_pasid_setup_second_level(iommu, domain, > - dev, PASID_RID2PASID); > + ret = dmar_domain_attach_device_pasid(domain, iommu, dev, > + PASID_RID2PASID); > if (ret) { > dev_err(dev, "Setup RID2PASID failed\n"); > device_block_translation(dev); Sorry! I forgot one thing. The dev_pasid data allocated in attach_dev path should be freed in device_block_translation(). Perhaps we need to add below change? @@ -4107,6 +4134,7 @@ static void device_block_translation(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; + struct device_pasid_info *dev_pasid; unsigned long flags; iommu_disable_pci_caps(info); @@ -4118,6 +4146,16 @@ static void device_block_translation(struct device *dev) domain_context_clear(info); } + spin_lock_irqsave(&info->domain->lock, flags); + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { + if (dev_pasid->dev != dev || dev_pasid->pasid != RID2PASID) + continue; + + list_del(&dev_pasid->link_domain); + kfree(dev_pasid); + } + spin_unlock_irqrestore(&info->domain->lock, flags); + if (!info->domain) return; Best regards, baolu
Hi Baolu, On Mon, 10 Apr 2023 10:46:02 +0800, Baolu Lu <baolu.lu@linux.intel.com> wrote: > On 4/8/23 2:05 AM, Jacob Pan wrote: > > @@ -2429,10 +2475,11 @@ static int __init si_domain_init(int hw) > > return 0; > > } > > > > -static int dmar_domain_attach_device(struct dmar_domain *domain, > > - struct device *dev) > > +static int dmar_domain_attach_device_pasid(struct dmar_domain *domain, > > + struct device *dev, ioasid_t > > pasid) { > > struct device_domain_info *info = dev_iommu_priv_get(dev); > > + struct device_pasid_info *dev_pasid; > > struct intel_iommu *iommu; > > unsigned long flags; > > u8 bus, devfn; > > @@ -2442,43 +2489,57 @@ static int dmar_domain_attach_device(struct > > dmar_domain *domain, if (!iommu) > > return -ENODEV; > > > > + dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); > > + if (!dev_pasid) > > + return -ENOMEM; > > + > > ret = domain_attach_iommu(domain, iommu); > > if (ret) > > - return ret; > > + goto exit_free; > > + > > info->domain = domain; > > + dev_pasid->pasid = pasid; > > + dev_pasid->dev = dev; > > spin_lock_irqsave(&domain->lock, flags); > > - list_add(&info->link, &domain->devices); > > + if (!info->dev_attached) > > + list_add(&info->link, &domain->devices); > > + > > + list_add(&dev_pasid->link_domain, &domain->dev_pasids); > > spin_unlock_irqrestore(&domain->lock, flags); > > > > /* PASID table is mandatory for a PCI device in scalable > > mode. */ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { > > /* Setup the PASID entry for requests without PASID: > > */ if (hw_pass_through && domain_type_is_si(domain)) > > - ret = intel_pasid_setup_pass_through(iommu, > > domain, > > - dev, PASID_RID2PASID); > > + ret = intel_pasid_setup_pass_through(iommu, > > domain, dev, pasid); else if (domain->use_first_level) > > - ret = domain_setup_first_level(iommu, domain, > > dev, > > - PASID_RID2PASID); > > + ret = domain_setup_first_level(iommu, domain, > > dev, pasid); else > > - ret = intel_pasid_setup_second_level(iommu, > > domain, > > - dev, PASID_RID2PASID); > > + ret = intel_pasid_setup_second_level(iommu, > > domain, dev, pasid); if (ret) { > > - dev_err(dev, "Setup RID2PASID failed\n"); > > + dev_err(dev, "Setup PASID %d failed\n", pasid); > > device_block_translation(dev); > > - return ret; > > + goto exit_free; > > } > > } > > + /* device context already activated, we are done */ > > + if (info->dev_attached) > > + goto exit; > > > > ret = domain_context_mapping(domain, dev); > > if (ret) { > > dev_err(dev, "Domain context map failed\n"); > > device_block_translation(dev); > > - return ret; > > + goto exit_free; > > } > > > > iommu_enable_pci_caps(info); > > - > > + info->dev_attached = 1; > > +exit: > > return 0; > > +exit_free: > > + kfree(dev_pasid); > > + return ret; > > } > > > > static bool device_has_rmrr(struct device *dev) > > @@ -4029,8 +4090,7 @@ static void device_block_translation(struct > > device *dev) iommu_disable_pci_caps(info); > > if (!dev_is_real_dma_subdevice(dev)) { > > if (sm_supported(iommu)) > > - intel_pasid_tear_down_entry(iommu, dev, > > - PASID_RID2PASID, > > false); > > + > > intel_iommu_detach_device_pasid(&info->domain->domain, dev, > > PASID_RID2PASID); else domain_context_clear(info); > > } > > @@ -4040,6 +4100,7 @@ static void device_block_translation(struct > > device *dev) > > spin_lock_irqsave(&info->domain->lock, flags); > > list_del(&info->link); > > + info->dev_attached = 0; > > spin_unlock_irqrestore(&info->domain->lock, flags); > > > > domain_detach_iommu(info->domain, iommu); > > @@ -4186,7 +4247,7 @@ static int intel_iommu_attach_device(struct > > iommu_domain *domain, if (ret) > > return ret; > > > > - return dmar_domain_attach_device(to_dmar_domain(domain), dev); > > + return dmar_domain_attach_device_pasid(to_dmar_domain(domain), > > dev, PASID_RID2PASID); } > > For VT-d driver, attach_dev and attach_dev_pasid have different > meanings. Merging them into one helper may lead to confusion. What do > you think of the following code? The dmar_domain_attach_device_pasid() > helper could be reused for attach_dev_pasid path. Per our previous discussion https://lore.kernel.org/lkml/ZAY4zd4OlgSz+puZ@nvidia.com/ We wanted to remove the ordering dependency between attaching device and device_pasid. i.e. making the two equal at IOMMU API level. So from that perspective, attach_dev_pasid will include attach_dev if the device has not been attached. i.e. attach_dev includes set up device context and RID_PASID attach_dev_pasid also include set up device context and another PASID. No ordering requirement. > diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c > index 7c2f4bd33582..09ae62bc3724 100644 > --- a/drivers/iommu/intel/iommu.c > +++ b/drivers/iommu/intel/iommu.c > @@ -2434,6 +2434,40 @@ static int __init si_domain_init(int hw) > return 0; > } > > + > +static int dmar_domain_attach_device_pasid(struct dmar_domain *domain, > + struct intel_iommu *iommu, > + struct device *dev, ioasid_t > pasid) +{ > + struct device_pasid_info *dev_pasid; > + unsigned long flags; > + int ret; > + > + dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); > + if (!dev_pasid) > + return -ENOMEM; > + > + if (hw_pass_through && domain_type_is_si(domain)) > + ret = intel_pasid_setup_pass_through(iommu, domain, dev, > pasid); > + else if (domain->use_first_level) > + ret = domain_setup_first_level(iommu, domain, dev, > pasid); > + else > + ret = intel_pasid_setup_second_level(iommu, domain, dev, > pasid); + > + if (ret) { > + kfree(dev_pasid); > + return ret; > + } > + > + dev_pasid->pasid = pasid; > + dev_pasid->dev = dev; > + spin_lock_irqsave(&domain->lock, flags); > + list_add(&dev_pasid->link_domain, &domain->dev_pasids); > + spin_unlock_irqrestore(&domain->lock, flags); > + > + return 0; > +} > + > static int dmar_domain_attach_device(struct dmar_domain *domain, > struct device *dev) > { > @@ -2458,15 +2492,8 @@ static int dmar_domain_attach_device(struct > dmar_domain *domain, > /* PASID table is mandatory for a PCI device in scalable mode. > */ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { > /* Setup the PASID entry for requests without PASID: */ > - if (hw_pass_through && domain_type_is_si(domain)) > - ret = intel_pasid_setup_pass_through(iommu, > domain, > - dev, PASID_RID2PASID); > - else if (domain->use_first_level) > - ret = domain_setup_first_level(iommu, domain, > dev, > - PASID_RID2PASID); > - else > - ret = intel_pasid_setup_second_level(iommu, > domain, > - dev, PASID_RID2PASID); > + ret = dmar_domain_attach_device_pasid(domain, iommu, dev, > + PASID_RID2PASID); > if (ret) { > dev_err(dev, "Setup RID2PASID failed\n"); > device_block_translation(dev); > > > > > static int intel_iommu_map(struct iommu_domain *domain, > > @@ -4675,26 +4736,52 @@ static void intel_iommu_iotlb_sync_map(struct > > iommu_domain *domain, __mapping_notify_one(info->iommu, dmar_domain, > > pfn, pages); } > > > > -static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t > > pasid) +static void intel_iommu_detach_device_pasid(struct iommu_domain > > *domain, > > + struct device *dev, > > ioasid_t pasid) { > > - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); > > - struct iommu_domain *domain; > > + struct device_domain_info *info = dev_iommu_priv_get(dev); > > + struct dmar_domain *dmar_domain = to_dmar_domain(domain); > > + struct device_pasid_info *i, *dev_pasid = NULL; > > + struct intel_iommu *iommu = info->iommu; > > + unsigned long flags; > > > > - /* Domain type specific cleanup: */ > > - domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); > > - if (domain) { > > - switch (domain->type) { > > - case IOMMU_DOMAIN_SVA: > > - intel_svm_remove_dev_pasid(dev, pasid); > > - break; > > - default: > > - /* should never reach here */ > > - WARN_ON(1); > > + spin_lock_irqsave(&dmar_domain->lock, flags); > > + list_for_each_entry(i, &dmar_domain->dev_pasids, link_domain) { > > + if (i->dev == dev && i->pasid == pasid) { > > + list_del(&i->link_domain); > > + dev_pasid = i; > > break; > > } > > } > > + spin_unlock_irqrestore(&dmar_domain->lock, flags); > > + if (WARN_ON(!dev_pasid)) > > + return; > > + > > + /* PASID entry already cleared during SVA unbind */ > > + if (domain->type != IOMMU_DOMAIN_SVA) > > + intel_pasid_tear_down_entry(iommu, dev, pasid, false); > > + > > + kfree(dev_pasid); > > +} > > + > > +static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t > > pasid) +{ > > + struct device_domain_info *info = dev_iommu_priv_get(dev); > > + struct dmar_domain *dmar_domain; > > + struct iommu_domain *domain; > > + > > + domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); > > + dmar_domain = to_dmar_domain(domain); > > + > > + /* > > + * SVA Domain type specific cleanup: Not ideal but not until > > we have > > + * IOPF capable domain specific ops, we need this special case. > > + */ > > + if (domain->type == IOMMU_DOMAIN_SVA) > > + return intel_svm_remove_dev_pasid(dev, pasid); > > > > - intel_pasid_tear_down_entry(iommu, dev, pasid, false); > > + intel_iommu_detach_device_pasid(domain, dev, pasid); > > + domain_detach_iommu(dmar_domain, info->iommu); > > } > > The remove_dev_pasid path need to change only after attach_dev_pasid op > is added, right? If so, we should move such change into the next patch. yes, you are right, will do. > > > > const struct iommu_ops intel_iommu_ops = { > > diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h > > index 65b15be72878..b6c26f25d1ba 100644 > > --- a/drivers/iommu/intel/iommu.h > > +++ b/drivers/iommu/intel/iommu.h > > @@ -595,6 +595,7 @@ struct dmar_domain { > > > > spinlock_t lock; /* Protect device tracking > > lists */ struct list_head devices; /* all devices' list */ > > + struct list_head dev_pasids; /* all attached pasids */ > > > > struct dma_pte *pgd; /* virtual address > > */ int gaw; /* max guest address width */ > > @@ -708,6 +709,7 @@ struct device_domain_info { > > u8 ats_supported:1; > > u8 ats_enabled:1; > > u8 dtlb_extra_inval:1; /* Quirk for devices need extra > > flush */ > > + u8 dev_attached:1; /* Device context activated */ > > u8 ats_qdep; > > struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ > > struct intel_iommu *iommu; /* IOMMU used by this device */ > > @@ -715,6 +717,12 @@ struct device_domain_info { > > struct pasid_table *pasid_table; /* pasid table */ > > }; > > > > +struct device_pasid_info { > > + struct list_head link_domain; /* link to domain > > siblings */ > > + struct device *dev; /* physical device derived > > from */ > > + ioasid_t pasid; /* PASID on physical > > device */ +}; > > + > > static inline void __iommu_flush_cache( > > struct intel_iommu *iommu, void *addr, int size) > > { > > Best regards, > baolu Thanks, Jacob
On 4/19/23 5:32 AM, Jacob Pan wrote: > On Mon, 10 Apr 2023 10:46:02 +0800, Baolu Lu<baolu.lu@linux.intel.com> > wrote: > >> On 4/8/23 2:05 AM, Jacob Pan wrote: >>> @@ -2429,10 +2475,11 @@ static int __init si_domain_init(int hw) >>> return 0; >>> } >>> >>> -static int dmar_domain_attach_device(struct dmar_domain *domain, >>> - struct device *dev) >>> +static int dmar_domain_attach_device_pasid(struct dmar_domain *domain, >>> + struct device *dev, ioasid_t >>> pasid) { >>> struct device_domain_info *info = dev_iommu_priv_get(dev); >>> + struct device_pasid_info *dev_pasid; >>> struct intel_iommu *iommu; >>> unsigned long flags; >>> u8 bus, devfn; >>> @@ -2442,43 +2489,57 @@ static int dmar_domain_attach_device(struct >>> dmar_domain *domain, if (!iommu) >>> return -ENODEV; >>> >>> + dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); >>> + if (!dev_pasid) >>> + return -ENOMEM; >>> + >>> ret = domain_attach_iommu(domain, iommu); >>> if (ret) >>> - return ret; >>> + goto exit_free; >>> + >>> info->domain = domain; >>> + dev_pasid->pasid = pasid; >>> + dev_pasid->dev = dev; >>> spin_lock_irqsave(&domain->lock, flags); >>> - list_add(&info->link, &domain->devices); >>> + if (!info->dev_attached) >>> + list_add(&info->link, &domain->devices); >>> + >>> + list_add(&dev_pasid->link_domain, &domain->dev_pasids); >>> spin_unlock_irqrestore(&domain->lock, flags); >>> >>> /* PASID table is mandatory for a PCI device in scalable >>> mode. */ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { >>> /* Setup the PASID entry for requests without PASID: >>> */ if (hw_pass_through && domain_type_is_si(domain)) >>> - ret = intel_pasid_setup_pass_through(iommu, >>> domain, >>> - dev, PASID_RID2PASID); >>> + ret = intel_pasid_setup_pass_through(iommu, >>> domain, dev, pasid); else if (domain->use_first_level) >>> - ret = domain_setup_first_level(iommu, domain, >>> dev, >>> - PASID_RID2PASID); >>> + ret = domain_setup_first_level(iommu, domain, >>> dev, pasid); else >>> - ret = intel_pasid_setup_second_level(iommu, >>> domain, >>> - dev, PASID_RID2PASID); >>> + ret = intel_pasid_setup_second_level(iommu, >>> domain, dev, pasid); if (ret) { >>> - dev_err(dev, "Setup RID2PASID failed\n"); >>> + dev_err(dev, "Setup PASID %d failed\n", pasid); >>> device_block_translation(dev); >>> - return ret; >>> + goto exit_free; >>> } >>> } >>> + /* device context already activated, we are done */ >>> + if (info->dev_attached) >>> + goto exit; >>> >>> ret = domain_context_mapping(domain, dev); >>> if (ret) { >>> dev_err(dev, "Domain context map failed\n"); >>> device_block_translation(dev); >>> - return ret; >>> + goto exit_free; >>> } >>> >>> iommu_enable_pci_caps(info); >>> - >>> + info->dev_attached = 1; >>> +exit: >>> return 0; >>> +exit_free: >>> + kfree(dev_pasid); >>> + return ret; >>> } >>> >>> static bool device_has_rmrr(struct device *dev) >>> @@ -4029,8 +4090,7 @@ static void device_block_translation(struct >>> device *dev) iommu_disable_pci_caps(info); >>> if (!dev_is_real_dma_subdevice(dev)) { >>> if (sm_supported(iommu)) >>> - intel_pasid_tear_down_entry(iommu, dev, >>> - PASID_RID2PASID, >>> false); >>> + >>> intel_iommu_detach_device_pasid(&info->domain->domain, dev, >>> PASID_RID2PASID); else domain_context_clear(info); >>> } >>> @@ -4040,6 +4100,7 @@ static void device_block_translation(struct >>> device *dev) >>> spin_lock_irqsave(&info->domain->lock, flags); >>> list_del(&info->link); >>> + info->dev_attached = 0; >>> spin_unlock_irqrestore(&info->domain->lock, flags); >>> >>> domain_detach_iommu(info->domain, iommu); >>> @@ -4186,7 +4247,7 @@ static int intel_iommu_attach_device(struct >>> iommu_domain *domain, if (ret) >>> return ret; >>> >>> - return dmar_domain_attach_device(to_dmar_domain(domain), dev); >>> + return dmar_domain_attach_device_pasid(to_dmar_domain(domain), >>> dev, PASID_RID2PASID); } >> For VT-d driver, attach_dev and attach_dev_pasid have different >> meanings. Merging them into one helper may lead to confusion. What do >> you think of the following code? The dmar_domain_attach_device_pasid() >> helper could be reused for attach_dev_pasid path. > Per our previous discussion > https://lore.kernel.org/lkml/ZAY4zd4OlgSz+puZ@nvidia.com/ > We wanted to remove the ordering dependency between attaching device and > device_pasid. i.e. making the two equal at IOMMU API level. Yes. That still holds. > > So from that perspective, attach_dev_pasid will include attach_dev if the > device has not been attached. i.e. I don't follow here. attach_dev and attach_dev_pasid are independent of each other. So in any case, attach_dev_pasid shouldn't include attach_dev. > attach_dev includes set up device context and RID_PASID > attach_dev_pasid also include set up device context and another PASID. I guess that you are worrying about the case where the context entry and pasid table are not setup yet in attach_dev_pasid path? In theory yes, but not exist in reality. The best case is that we setup context entry in probe_device path, but at present, perhaps we can simply check and return failure in this case. Any way, I'd suggest not mix two ops in a single function. > > No ordering requirement. > Best regards, baolu
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index cbb2670f88ca..52b9d0d3a02c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -278,6 +278,8 @@ static LIST_HEAD(dmar_satc_units); list_for_each_entry(rmrr, &dmar_rmrr_units, list) static void device_block_translation(struct device *dev); +static void intel_iommu_detach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid); static void intel_iommu_domain_free(struct iommu_domain *domain); int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON); @@ -1365,6 +1367,7 @@ domain_lookup_dev_info(struct dmar_domain *domain, static void domain_update_iotlb(struct dmar_domain *domain) { + struct device_pasid_info *dev_pasid; struct device_domain_info *info; bool has_iotlb_device = false; unsigned long flags; @@ -1376,6 +1379,14 @@ static void domain_update_iotlb(struct dmar_domain *domain) break; } } + + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { + info = dev_iommu_priv_get(dev_pasid->dev); + if (info->ats_enabled) { + has_iotlb_device = true; + break; + } + } domain->has_iotlb_device = has_iotlb_device; spin_unlock_irqrestore(&domain->lock, flags); } @@ -1486,6 +1497,7 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info, static void iommu_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, unsigned mask) { + struct device_pasid_info *dev_pasid; struct device_domain_info *info; unsigned long flags; @@ -1495,6 +1507,39 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, spin_lock_irqsave(&domain->lock, flags); list_for_each_entry(info, &domain->devices, link) __iommu_flush_dev_iotlb(info, addr, mask); + + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { + /* device TLB is not aware of the use of RID PASID is for DMA w/o PASID */ + if (dev_pasid->pasid == PASID_RID2PASID) + continue; + + info = dev_iommu_priv_get(dev_pasid->dev); + qi_flush_dev_iotlb_pasid(info->iommu, + PCI_DEVID(info->bus, info->devfn), + info->pfsid, dev_pasid->pasid, + info->ats_qdep, addr, + mask); + } + spin_unlock_irqrestore(&domain->lock, flags); +} + +/* + * The VT-d spec requires to use PASID-based-IOTLB Invalidation to + * invalidate IOTLB and the paging-structure-caches for a first-stage + * page table. + */ +static void domain_flush_pasid_iotlb(struct intel_iommu *iommu, + struct dmar_domain *domain, u64 addr, + unsigned long npages, bool ih) +{ + u16 did = domain_id_iommu(domain, iommu); + struct device_pasid_info *dev_pasid; + unsigned long flags; + + spin_lock_irqsave(&domain->lock, flags); + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) + qi_flush_piotlb(iommu, did, dev_pasid->pasid, addr, npages, ih); + spin_unlock_irqrestore(&domain->lock, flags); } @@ -1514,7 +1559,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, ih = 1 << 6; if (domain->use_first_level) { - qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih); + domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih); } else { unsigned long bitmask = aligned_pages - 1; @@ -1584,7 +1629,7 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain) u16 did = domain_id_iommu(dmar_domain, iommu); if (dmar_domain->use_first_level) - qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0); + domain_flush_pasid_iotlb(iommu, dmar_domain, 0, -1, 0); else iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); @@ -1756,6 +1801,7 @@ static struct dmar_domain *alloc_domain(unsigned int type) domain->use_first_level = true; domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); + INIT_LIST_HEAD(&domain->dev_pasids); spin_lock_init(&domain->lock); xa_init(&domain->iommu_array); @@ -2429,10 +2475,11 @@ static int __init si_domain_init(int hw) return 0; } -static int dmar_domain_attach_device(struct dmar_domain *domain, - struct device *dev) +static int dmar_domain_attach_device_pasid(struct dmar_domain *domain, + struct device *dev, ioasid_t pasid) { struct device_domain_info *info = dev_iommu_priv_get(dev); + struct device_pasid_info *dev_pasid; struct intel_iommu *iommu; unsigned long flags; u8 bus, devfn; @@ -2442,43 +2489,57 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (!iommu) return -ENODEV; + dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); + if (!dev_pasid) + return -ENOMEM; + ret = domain_attach_iommu(domain, iommu); if (ret) - return ret; + goto exit_free; + info->domain = domain; + dev_pasid->pasid = pasid; + dev_pasid->dev = dev; spin_lock_irqsave(&domain->lock, flags); - list_add(&info->link, &domain->devices); + if (!info->dev_attached) + list_add(&info->link, &domain->devices); + + list_add(&dev_pasid->link_domain, &domain->dev_pasids); spin_unlock_irqrestore(&domain->lock, flags); /* PASID table is mandatory for a PCI device in scalable mode. */ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { /* Setup the PASID entry for requests without PASID: */ if (hw_pass_through && domain_type_is_si(domain)) - ret = intel_pasid_setup_pass_through(iommu, domain, - dev, PASID_RID2PASID); + ret = intel_pasid_setup_pass_through(iommu, domain, dev, pasid); else if (domain->use_first_level) - ret = domain_setup_first_level(iommu, domain, dev, - PASID_RID2PASID); + ret = domain_setup_first_level(iommu, domain, dev, pasid); else - ret = intel_pasid_setup_second_level(iommu, domain, - dev, PASID_RID2PASID); + ret = intel_pasid_setup_second_level(iommu, domain, dev, pasid); if (ret) { - dev_err(dev, "Setup RID2PASID failed\n"); + dev_err(dev, "Setup PASID %d failed\n", pasid); device_block_translation(dev); - return ret; + goto exit_free; } } + /* device context already activated, we are done */ + if (info->dev_attached) + goto exit; ret = domain_context_mapping(domain, dev); if (ret) { dev_err(dev, "Domain context map failed\n"); device_block_translation(dev); - return ret; + goto exit_free; } iommu_enable_pci_caps(info); - + info->dev_attached = 1; +exit: return 0; +exit_free: + kfree(dev_pasid); + return ret; } static bool device_has_rmrr(struct device *dev) @@ -4029,8 +4090,7 @@ static void device_block_translation(struct device *dev) iommu_disable_pci_caps(info); if (!dev_is_real_dma_subdevice(dev)) { if (sm_supported(iommu)) - intel_pasid_tear_down_entry(iommu, dev, - PASID_RID2PASID, false); + intel_iommu_detach_device_pasid(&info->domain->domain, dev, PASID_RID2PASID); else domain_context_clear(info); } @@ -4040,6 +4100,7 @@ static void device_block_translation(struct device *dev) spin_lock_irqsave(&info->domain->lock, flags); list_del(&info->link); + info->dev_attached = 0; spin_unlock_irqrestore(&info->domain->lock, flags); domain_detach_iommu(info->domain, iommu); @@ -4186,7 +4247,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, if (ret) return ret; - return dmar_domain_attach_device(to_dmar_domain(domain), dev); + return dmar_domain_attach_device_pasid(to_dmar_domain(domain), dev, PASID_RID2PASID); } static int intel_iommu_map(struct iommu_domain *domain, @@ -4675,26 +4736,52 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, __mapping_notify_one(info->iommu, dmar_domain, pfn, pages); } -static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) +static void intel_iommu_detach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) { - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); - struct iommu_domain *domain; + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct device_pasid_info *i, *dev_pasid = NULL; + struct intel_iommu *iommu = info->iommu; + unsigned long flags; - /* Domain type specific cleanup: */ - domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); - if (domain) { - switch (domain->type) { - case IOMMU_DOMAIN_SVA: - intel_svm_remove_dev_pasid(dev, pasid); - break; - default: - /* should never reach here */ - WARN_ON(1); + spin_lock_irqsave(&dmar_domain->lock, flags); + list_for_each_entry(i, &dmar_domain->dev_pasids, link_domain) { + if (i->dev == dev && i->pasid == pasid) { + list_del(&i->link_domain); + dev_pasid = i; break; } } + spin_unlock_irqrestore(&dmar_domain->lock, flags); + if (WARN_ON(!dev_pasid)) + return; + + /* PASID entry already cleared during SVA unbind */ + if (domain->type != IOMMU_DOMAIN_SVA) + intel_pasid_tear_down_entry(iommu, dev, pasid, false); + + kfree(dev_pasid); +} + +static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct dmar_domain *dmar_domain; + struct iommu_domain *domain; + + domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); + dmar_domain = to_dmar_domain(domain); + + /* + * SVA Domain type specific cleanup: Not ideal but not until we have + * IOPF capable domain specific ops, we need this special case. + */ + if (domain->type == IOMMU_DOMAIN_SVA) + return intel_svm_remove_dev_pasid(dev, pasid); - intel_pasid_tear_down_entry(iommu, dev, pasid, false); + intel_iommu_detach_device_pasid(domain, dev, pasid); + domain_detach_iommu(dmar_domain, info->iommu); } const struct iommu_ops intel_iommu_ops = { diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 65b15be72878..b6c26f25d1ba 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -595,6 +595,7 @@ struct dmar_domain { spinlock_t lock; /* Protect device tracking lists */ struct list_head devices; /* all devices' list */ + struct list_head dev_pasids; /* all attached pasids */ struct dma_pte *pgd; /* virtual address */ int gaw; /* max guest address width */ @@ -708,6 +709,7 @@ struct device_domain_info { u8 ats_supported:1; u8 ats_enabled:1; u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */ + u8 dev_attached:1; /* Device context activated */ u8 ats_qdep; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ @@ -715,6 +717,12 @@ struct device_domain_info { struct pasid_table *pasid_table; /* pasid table */ }; +struct device_pasid_info { + struct list_head link_domain; /* link to domain siblings */ + struct device *dev; /* physical device derived from */ + ioasid_t pasid; /* PASID on physical device */ +}; + static inline void __iommu_flush_cache( struct intel_iommu *iommu, void *addr, int size) {