diff mbox series

[v5,6/7] iommu/vt-d: Implement set_dev_pasid domain op

Message ID 20230427174937.471668-7-jacob.jun.pan@linux.intel.com (mailing list archive)
State Superseded
Headers show
Series Re-enable IDXD kernel workqueue under DMA API | expand

Commit Message

Jacob Pan April 27, 2023, 5:49 p.m. UTC
Devices that use ENQCMDS to submit work on buffers mapped by DMA API
must attach a PASID to the default domain of the device. In preparation
for this use case, this patch implements set_dev_pasid() for the
default_domain_ops.

If the device context has not been set up prior to this call, this will
set up the device context in addition to PASID attachment.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
---
 drivers/iommu/intel/iommu.c | 92 ++++++++++++++++++++++++++++++-------
 1 file changed, 76 insertions(+), 16 deletions(-)

Comments

Baolu Lu May 3, 2023, 7:26 a.m. UTC | #1
On 4/28/23 1:49 AM, Jacob Pan wrote:
> Devices that use ENQCMDS to submit work on buffers mapped by DMA API
> must attach a PASID to the default domain of the device. In preparation
> for this use case, this patch implements set_dev_pasid() for the
> default_domain_ops.
> 
> If the device context has not been set up prior to this call, this will
> set up the device context in addition to PASID attachment.
> 
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> ---
>   drivers/iommu/intel/iommu.c | 92 ++++++++++++++++++++++++++++++-------
>   1 file changed, 76 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index 388453a7415e..f9d6c31cdc8e 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -278,6 +278,8 @@ static LIST_HEAD(dmar_satc_units);
>   	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
>   
>   static void device_block_translation(struct device *dev);
> +static void intel_iommu_detach_device_pasid(struct iommu_domain *domain,
> +					    struct device *dev, ioasid_t pasid);
>   static void intel_iommu_domain_free(struct iommu_domain *domain);
>   
>   int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
> @@ -4091,8 +4093,7 @@ static void device_block_translation(struct device *dev)
>   	iommu_disable_pci_caps(info);
>   	if (!dev_is_real_dma_subdevice(dev)) {
>   		if (sm_supported(iommu))
> -			intel_pasid_tear_down_entry(iommu, dev,
> -						    IOMMU_DEF_RID_PASID, false);
> +			intel_iommu_detach_device_pasid(&info->domain->domain, dev, IOMMU_DEF_RID_PASID);

device_block_translation() is called when switch RID's domain or release
the device. I assume that we don't need to touch this path when we add
the attach_dev_pasid support.

Blocking DMA translation through RID/PASID should be done in
remove_dev_pasid path.

Or, I overlooked anything?

[...]

>   
> +static int intel_iommu_attach_device_pasid(struct iommu_domain *domain,
> +					   struct device *dev, ioasid_t pasid)
> +{
> +	struct device_domain_info *info = dev_iommu_priv_get(dev);
> +	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> +	struct intel_iommu *iommu = info->iommu;
> +	int ret;
> +
> +	if (!pasid_supported(iommu))
> +		return -ENODEV;
> +
> +	ret = prepare_domain_attach_device(domain, dev);
> +	if (ret)
> +		return ret;
> +
> +	/*
> +	 * Most likely the device context has already been set up, will only
> +	 * take a domain ID reference. Otherwise, device context will be set
> +	 * up here.

The "otherwise" case is only default domain deferred attaching case,
right?

When the device driver starts to call attach_dev_pasid api, it means
that the bus and device DMA configuration have been done. We could do
the deferred default domain attaching now. So, perhaps we should add
below code in the core:

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index f1dcfa3f1a1b..633b5ca53606 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3296,6 +3296,12 @@ int iommu_attach_device_pasid(struct iommu_domain 
*domain,
         if (!group)
                 return -ENODEV;

+       ret = iommu_deferred_attach(dev, group->default_domain);
+       if (ret) {
+               iommu_group_put(group);
+               return ret;
+       }
+
         mutex_lock(&group->mutex);
         curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, 
GFP_KERNEL);
         if (curr) {

Perhaps need to call iommu_deferred_attach() inside the group->mutex
critical region?

> +	 * The upper layer APIs make no assumption about the ordering between
> +	 * device attachment and the PASID attachment.
> +	 */
> +	ret = dmar_domain_attach_device(to_dmar_domain(domain), dev);

Calling attach_device on the attach_dev_pasid path is not right.

> +	if (ret) {
> +		dev_err(dev, "Attach device failed\n");
> +		return ret;
> +	}
> +	return dmar_domain_attach_device_pasid(dmar_domain, iommu, dev, pasid);
> +}
> +
> +
> +
>   const struct iommu_ops intel_iommu_ops = {
>   	.capable		= intel_iommu_capable,
>   	.domain_alloc		= intel_iommu_domain_alloc,
> @@ -4802,6 +4861,7 @@ const struct iommu_ops intel_iommu_ops = {
>   #endif
>   	.default_domain_ops = &(const struct iommu_domain_ops) {
>   		.attach_dev		= intel_iommu_attach_device,
> +		.set_dev_pasid          = intel_iommu_attach_device_pasid,
>   		.map_pages		= intel_iommu_map_pages,
>   		.unmap_pages		= intel_iommu_unmap_pages,
>   		.iotlb_sync_map		= intel_iommu_iotlb_sync_map,

Best regards,
baolu
Jacob Pan May 4, 2023, 11:03 p.m. UTC | #2
Hi Baolu,

On Wed, 3 May 2023 15:26:00 +0800, Baolu Lu <baolu.lu@linux.intel.com>
wrote:

> On 4/28/23 1:49 AM, Jacob Pan wrote:
> > Devices that use ENQCMDS to submit work on buffers mapped by DMA API
> > must attach a PASID to the default domain of the device. In preparation
> > for this use case, this patch implements set_dev_pasid() for the
> > default_domain_ops.
> > 
> > If the device context has not been set up prior to this call, this will
> > set up the device context in addition to PASID attachment.
> > 
> > Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > ---
> >   drivers/iommu/intel/iommu.c | 92 ++++++++++++++++++++++++++++++-------
> >   1 file changed, 76 insertions(+), 16 deletions(-)
> > 
> > diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> > index 388453a7415e..f9d6c31cdc8e 100644
> > --- a/drivers/iommu/intel/iommu.c
> > +++ b/drivers/iommu/intel/iommu.c
> > @@ -278,6 +278,8 @@ static LIST_HEAD(dmar_satc_units);
> >   	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
> >   
> >   static void device_block_translation(struct device *dev);
> > +static void intel_iommu_detach_device_pasid(struct iommu_domain
> > *domain,
> > +					    struct device *dev,
> > ioasid_t pasid); static void intel_iommu_domain_free(struct
> > iommu_domain *domain); 
> >   int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
> > @@ -4091,8 +4093,7 @@ static void device_block_translation(struct
> > device *dev) iommu_disable_pci_caps(info);
> >   	if (!dev_is_real_dma_subdevice(dev)) {
> >   		if (sm_supported(iommu))
> > -			intel_pasid_tear_down_entry(iommu, dev,
> > -
> > IOMMU_DEF_RID_PASID, false);
> > +
> > intel_iommu_detach_device_pasid(&info->domain->domain, dev,
> > IOMMU_DEF_RID_PASID);  
> 
> device_block_translation() is called when switch RID's domain or release
> the device. I assume that we don't need to touch this path when we add
> the attach_dev_pasid support.
> 
> Blocking DMA translation through RID/PASID should be done in
> remove_dev_pasid path.
> 
> Or, I overlooked anything?
> 
> [...]
> 
> >   
> > +static int intel_iommu_attach_device_pasid(struct iommu_domain *domain,
> > +					   struct device *dev,
> > ioasid_t pasid) +{
> > +	struct device_domain_info *info = dev_iommu_priv_get(dev);
> > +	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> > +	struct intel_iommu *iommu = info->iommu;
> > +	int ret;
> > +
> > +	if (!pasid_supported(iommu))
> > +		return -ENODEV;
> > +
> > +	ret = prepare_domain_attach_device(domain, dev);
> > +	if (ret)
> > +		return ret;
> > +
> > +	/*
> > +	 * Most likely the device context has already been set up,
> > will only
> > +	 * take a domain ID reference. Otherwise, device context will
> > be set
> > +	 * up here.  
> 
> The "otherwise" case is only default domain deferred attaching case,
> right?
it might be the only case so far, but my intention is to be general. i.e.
no ordering requirements. I believe it is more future proof in case
device_attach_pasid called before device_attach.

> When the device driver starts to call attach_dev_pasid api, it means
> that the bus and device DMA configuration have been done. We could do
> the deferred default domain attaching now. So, perhaps we should add
> below code in the core:
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index f1dcfa3f1a1b..633b5ca53606 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -3296,6 +3296,12 @@ int iommu_attach_device_pasid(struct iommu_domain 
> *domain,
>          if (!group)
>                  return -ENODEV;
> 
> +       ret = iommu_deferred_attach(dev, group->default_domain);
> +       if (ret) {
> +               iommu_group_put(group);
> +               return ret;
> +       }
it will cover the device_attach, but adding a special case.

>          mutex_lock(&group->mutex);
>          curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, 
> GFP_KERNEL);
>          if (curr) {
> 
> Perhaps need to call iommu_deferred_attach() inside the group->mutex
> critical region?
i agree, attach RID_PASID should also be on the group's pasid_array.

> > +	 * The upper layer APIs make no assumption about the ordering
> > between
> > +	 * device attachment and the PASID attachment.
> > +	 */
> > +	ret = dmar_domain_attach_device(to_dmar_domain(domain), dev);  
> 
> Calling attach_device on the attach_dev_pasid path is not right.
I think it comes down to the philosophical differences in terms of
who is responsible for ensuring device ctx is set up  prior to device pasid
attach:
1. vt-d driver
2. upper layer API

> > +	if (ret) {
> > +		dev_err(dev, "Attach device failed\n");
> > +		return ret;
> > +	}
> > +	return dmar_domain_attach_device_pasid(dmar_domain, iommu,
> > dev, pasid); +}
> > +
> > +
> > +
> >   const struct iommu_ops intel_iommu_ops = {
> >   	.capable		= intel_iommu_capable,
> >   	.domain_alloc		= intel_iommu_domain_alloc,
> > @@ -4802,6 +4861,7 @@ const struct iommu_ops intel_iommu_ops = {
> >   #endif
> >   	.default_domain_ops = &(const struct iommu_domain_ops) {
> >   		.attach_dev		=
> > intel_iommu_attach_device,
> > +		.set_dev_pasid          =
> > intel_iommu_attach_device_pasid, .map_pages		=
> > intel_iommu_map_pages, .unmap_pages		=
> > intel_iommu_unmap_pages, .iotlb_sync_map		=
> > intel_iommu_iotlb_sync_map,  
> 
> Best regards,
> baolu


Thanks,

Jacob
Baolu Lu May 5, 2023, 2:58 a.m. UTC | #3
On 5/5/23 7:03 AM, Jacob Pan wrote:
>>> +static int intel_iommu_attach_device_pasid(struct iommu_domain *domain,
>>> +					   struct device *dev,
>>> ioasid_t pasid) +{
>>> +	struct device_domain_info *info = dev_iommu_priv_get(dev);
>>> +	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
>>> +	struct intel_iommu *iommu = info->iommu;
>>> +	int ret;
>>> +
>>> +	if (!pasid_supported(iommu))
>>> +		return -ENODEV;
>>> +
>>> +	ret = prepare_domain_attach_device(domain, dev);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	/*
>>> +	 * Most likely the device context has already been set up,
>>> will only
>>> +	 * take a domain ID reference. Otherwise, device context will
>>> be set
>>> +	 * up here.
>> The "otherwise" case is only default domain deferred attaching case,
>> right?
> it might be the only case so far, but my intention is to be general. i.e.
> no ordering requirements. I believe it is more future proof in case
> device_attach_pasid called before device_attach.

Let's put aside deferred attach and talk about it later.

The device's context entry is configured when the default domain is
being attached to the device. And, the default domain attaching is in
the iommu probe_device path. It always happens before set_device_pasid
which is designed to be called by the device driver. So the real
situation is that when the device driver calls set_device_pasid, the
context entry should already have been configured.

Then let's pick up the deferred attach case. It is a workaround for
kdump (Documentation/admin-guide/kdump/kdump.rst). I don't think PASID
feature is functionally required by any kdump capture kernel as its
main purpose is to dump the memory of a panic kernel.

In summary, it seems to be reasonable for the vt-d driver to return
-EBUSY when a device's context was copied. The idxd driver should
continue to work without kernel DMA with PASID support.

	if (context_copied(iommu, bus, devfn))
		return -EBUSY;

Make things general is always good, but this doesn't mean that we need
to make the code complex to support cases that do not exist or are not
used. Thoughts?

Best regards,
baolu
Jacob Pan May 5, 2023, 8:32 p.m. UTC | #4
Hi Baolu,

On Fri, 5 May 2023 10:58:38 +0800, Baolu Lu <baolu.lu@linux.intel.com>
wrote:

> On 5/5/23 7:03 AM, Jacob Pan wrote:
> >>> +static int intel_iommu_attach_device_pasid(struct iommu_domain
> >>> *domain,
> >>> +					   struct device *dev,
> >>> ioasid_t pasid) +{
> >>> +	struct device_domain_info *info = dev_iommu_priv_get(dev);
> >>> +	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> >>> +	struct intel_iommu *iommu = info->iommu;
> >>> +	int ret;
> >>> +
> >>> +	if (!pasid_supported(iommu))
> >>> +		return -ENODEV;
> >>> +
> >>> +	ret = prepare_domain_attach_device(domain, dev);
> >>> +	if (ret)
> >>> +		return ret;
> >>> +
> >>> +	/*
> >>> +	 * Most likely the device context has already been set up,
> >>> will only
> >>> +	 * take a domain ID reference. Otherwise, device context will
> >>> be set
> >>> +	 * up here.  
> >> The "otherwise" case is only default domain deferred attaching case,
> >> right?  
> > it might be the only case so far, but my intention is to be general.
> > i.e. no ordering requirements. I believe it is more future proof in case
> > device_attach_pasid called before device_attach.  
> 
> Let's put aside deferred attach and talk about it later.
> 
> The device's context entry is configured when the default domain is
> being attached to the device. And, the default domain attaching is in
> the iommu probe_device path. It always happens before set_device_pasid
> which is designed to be called by the device driver. So the real
> situation is that when the device driver calls set_device_pasid, the
> context entry should already have been configured.
> 
> Then let's pick up the deferred attach case. It is a workaround for
> kdump (Documentation/admin-guide/kdump/kdump.rst). I don't think PASID
> feature is functionally required by any kdump capture kernel as its
> main purpose is to dump the memory of a panic kernel.
> 
> In summary, it seems to be reasonable for the vt-d driver to return
> -EBUSY when a device's context was copied. The idxd driver should
> continue to work without kernel DMA with PASID support.
> 
> 	if (context_copied(iommu, bus, devfn))
> 		return -EBUSY;
> 
> Make things general is always good, but this doesn't mean that we need
> to make the code complex to support cases that do not exist or are not
> used. Thoughts?
> 
Good point, it is better not put dead code in. Let me also document this
behavior for future change that may affect the ordering requirement.

Thanks,

Jacob
diff mbox series

Patch

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 388453a7415e..f9d6c31cdc8e 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -278,6 +278,8 @@  static LIST_HEAD(dmar_satc_units);
 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
 
 static void device_block_translation(struct device *dev);
+static void intel_iommu_detach_device_pasid(struct iommu_domain *domain,
+					    struct device *dev, ioasid_t pasid);
 static void intel_iommu_domain_free(struct iommu_domain *domain);
 
 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
@@ -4091,8 +4093,7 @@  static void device_block_translation(struct device *dev)
 	iommu_disable_pci_caps(info);
 	if (!dev_is_real_dma_subdevice(dev)) {
 		if (sm_supported(iommu))
-			intel_pasid_tear_down_entry(iommu, dev,
-						    IOMMU_DEF_RID_PASID, false);
+			intel_iommu_detach_device_pasid(&info->domain->domain, dev, IOMMU_DEF_RID_PASID);
 		else
 			domain_context_clear(info);
 	}
@@ -4761,28 +4762,86 @@  static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
 		__mapping_notify_one(info->iommu, dmar_domain, pfn, pages);
 }
 
-static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
+static void intel_iommu_detach_device_pasid(struct iommu_domain *domain,
+					    struct device *dev, ioasid_t pasid)
 {
-	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
-	struct iommu_domain *domain;
+	struct device_domain_info *info = dev_iommu_priv_get(dev);
+	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+	struct device_pasid_info *i, *dev_pasid = NULL;
+	struct intel_iommu *iommu = info->iommu;
+	unsigned long flags;
 
-	/* Domain type specific cleanup: */
-	domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
-	if (domain) {
-		switch (domain->type) {
-		case IOMMU_DOMAIN_SVA:
-			intel_svm_remove_dev_pasid(dev, pasid);
-			break;
-		default:
-			/* should never reach here */
-			WARN_ON(1);
+	spin_lock_irqsave(&dmar_domain->lock, flags);
+	list_for_each_entry(i, &dmar_domain->dev_pasids, link_domain) {
+		if (i->dev == dev && i->pasid == pasid) {
+			list_del(&i->link_domain);
+			dev_pasid = i;
 			break;
 		}
 	}
+	spin_unlock_irqrestore(&dmar_domain->lock, flags);
+	if (WARN_ON(!dev_pasid))
+		return;
+
+	/* PASID entry already cleared during SVA unbind */
+	if (domain->type != IOMMU_DOMAIN_SVA)
+		intel_pasid_tear_down_entry(iommu, dev, pasid, false);
+
+	kfree(dev_pasid);
+}
+
+static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
+{
+	struct device_domain_info *info = dev_iommu_priv_get(dev);
+	struct dmar_domain *dmar_domain;
+	struct iommu_domain *domain;
+
+	domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
+	dmar_domain = to_dmar_domain(domain);
+
+	/*
+	 * SVA Domain type specific cleanup: Not ideal but not until we have
+	 * IOPF capable domain specific ops, we need this special case.
+	 */
+	if (domain->type == IOMMU_DOMAIN_SVA)
+		return intel_svm_remove_dev_pasid(dev, pasid);
 
-	intel_pasid_tear_down_entry(iommu, dev, pasid, false);
+	intel_iommu_detach_device_pasid(domain, dev, pasid);
+	domain_detach_iommu(dmar_domain, info->iommu);
 }
 
+static int intel_iommu_attach_device_pasid(struct iommu_domain *domain,
+					   struct device *dev, ioasid_t pasid)
+{
+	struct device_domain_info *info = dev_iommu_priv_get(dev);
+	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+	struct intel_iommu *iommu = info->iommu;
+	int ret;
+
+	if (!pasid_supported(iommu))
+		return -ENODEV;
+
+	ret = prepare_domain_attach_device(domain, dev);
+	if (ret)
+		return ret;
+
+	/*
+	 * Most likely the device context has already been set up, will only
+	 * take a domain ID reference. Otherwise, device context will be set
+	 * up here.
+	 * The upper layer APIs make no assumption about the ordering between
+	 * device attachment and the PASID attachment.
+	 */
+	ret = dmar_domain_attach_device(to_dmar_domain(domain), dev);
+	if (ret) {
+		dev_err(dev, "Attach device failed\n");
+		return ret;
+	}
+	return dmar_domain_attach_device_pasid(dmar_domain, iommu, dev, pasid);
+}
+
+
+
 const struct iommu_ops intel_iommu_ops = {
 	.capable		= intel_iommu_capable,
 	.domain_alloc		= intel_iommu_domain_alloc,
@@ -4802,6 +4861,7 @@  const struct iommu_ops intel_iommu_ops = {
 #endif
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev		= intel_iommu_attach_device,
+		.set_dev_pasid          = intel_iommu_attach_device_pasid,
 		.map_pages		= intel_iommu_map_pages,
 		.unmap_pages		= intel_iommu_unmap_pages,
 		.iotlb_sync_map		= intel_iommu_iotlb_sync_map,