diff mbox series

[v2,5/8] iommu/arm-smmu-v3: Report IOMMU_CAP_ENFORCE_CACHE_COHERENCY for CANWBS

Message ID 5-v2-621370057090+91fec-smmuv3_nesting_jgg@nvidia.com (mailing list archive)
State Handled Elsewhere, archived
Headers show
Series Initial support for SMMUv3 nested translation | expand

Commit Message

Jason Gunthorpe Aug. 27, 2024, 3:51 p.m. UTC
HW with CANWBS is always cache coherent and ignores PCI No Snoop requests
as well. This meets the requirement for IOMMU_CAP_ENFORCE_CACHE_COHERENCY,
so let's return it.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 35 +++++++++++++++++++++
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  1 +
 2 files changed, 36 insertions(+)

Comments

Nicolin Chen Aug. 27, 2024, 8:12 p.m. UTC | #1
On Tue, Aug 27, 2024 at 12:51:35PM -0300, Jason Gunthorpe wrote:
> HW with CANWBS is always cache coherent and ignores PCI No Snoop requests
> as well. This meets the requirement for IOMMU_CAP_ENFORCE_CACHE_COHERENCY,
> so let's return it.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>

With two very ignorable nits:

> @@ -2693,6 +2718,15 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
>  		 * one of them.
>  		 */
>  		spin_lock_irqsave(&smmu_domain->devices_lock, flags);
> +		if (smmu_domain->enforce_cache_coherency &&
> +		    !(dev_iommu_fwspec_get(master->dev)->flags &
> +		      IOMMU_FWSPEC_PCI_RC_CANWBS)) {

How about a small dev_enforce_cache_coherency() helper?

> +			kfree(master_domain);
> +			spin_unlock_irqrestore(&smmu_domain->devices_lock,
> +					       flags);
> +			return -EINVAL;

kfree() doesn't need to be locked.

Thanks
Nicolin
Jason Gunthorpe Aug. 28, 2024, 7:12 p.m. UTC | #2
On Tue, Aug 27, 2024 at 01:12:27PM -0700, Nicolin Chen wrote:
> > @@ -2693,6 +2718,15 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
> >  		 * one of them.
> >  		 */
> >  		spin_lock_irqsave(&smmu_domain->devices_lock, flags);
> > +		if (smmu_domain->enforce_cache_coherency &&
> > +		    !(dev_iommu_fwspec_get(master->dev)->flags &
> > +		      IOMMU_FWSPEC_PCI_RC_CANWBS)) {
> 
> How about a small dev_enforce_cache_coherency() helper?

I added a

+static bool arm_smmu_master_canwbs(struct arm_smmu_master *master)
+{
+       return dev_iommu_fwspec_get(master->dev)->flags &
+              IOMMU_FWSPEC_PCI_RC_CANWBS;
+}
+

> > +			kfree(master_domain);
> > +			spin_unlock_irqrestore(&smmu_domain->devices_lock,
> > +					       flags);
> > +			return -EINVAL;
> 
> kfree() doesn't need to be locked.

Yep

Thanks,
Jason
Mostafa Saleh Aug. 30, 2024, 3:19 p.m. UTC | #3
Hi Jason,

On Tue, Aug 27, 2024 at 12:51:35PM -0300, Jason Gunthorpe wrote:
> HW with CANWBS is always cache coherent and ignores PCI No Snoop requests
> as well. This meets the requirement for IOMMU_CAP_ENFORCE_CACHE_COHERENCY,
> so let's return it.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

Reviewed-by: Mostafa Saleh <smostafa@google.com>

> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 35 +++++++++++++++++++++
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  1 +
>  2 files changed, 36 insertions(+)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index e2b97ad6d74b03..c2021e821e5cb6 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -2253,6 +2253,9 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
>  	case IOMMU_CAP_CACHE_COHERENCY:
>  		/* Assume that a coherent TCU implies coherent TBUs */
>  		return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
> +	case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
> +		return dev_iommu_fwspec_get(dev)->flags &
> +		       IOMMU_FWSPEC_PCI_RC_CANWBS;
>  	case IOMMU_CAP_NOEXEC:
>  	case IOMMU_CAP_DEFERRED_FLUSH:
>  		return true;
> @@ -2263,6 +2266,28 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
>  	}
>  }
>  
> +static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain)
> +{
> +	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> +	struct arm_smmu_master_domain *master_domain;
> +	unsigned long flags;
> +	bool ret = false;
nit: we can avoid the goto, if we inverse the logic of ret (and set it
to false if device doesn't support CANWBS)

> +	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
> +	list_for_each_entry(master_domain, &smmu_domain->devices,
> +			    devices_elm) {
> +		if (!(dev_iommu_fwspec_get(master_domain->master->dev)->flags &
> +		      IOMMU_FWSPEC_PCI_RC_CANWBS))
> +			goto out;
> +	}
> +
> +	smmu_domain->enforce_cache_coherency = true;
> +	ret = true;
> +out:
> +	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
> +	return ret;
> +}
> +
>  struct arm_smmu_domain *arm_smmu_domain_alloc(void)
>  {
>  	struct arm_smmu_domain *smmu_domain;
> @@ -2693,6 +2718,15 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
>  		 * one of them.
>  		 */
>  		spin_lock_irqsave(&smmu_domain->devices_lock, flags);
> +		if (smmu_domain->enforce_cache_coherency &&
> +		    !(dev_iommu_fwspec_get(master->dev)->flags &
> +		      IOMMU_FWSPEC_PCI_RC_CANWBS)) {
> +			kfree(master_domain);
> +			spin_unlock_irqrestore(&smmu_domain->devices_lock,
> +					       flags);
> +			return -EINVAL;
> +		}
> +
>  		if (state->ats_enabled)
>  			atomic_inc(&smmu_domain->nr_ats_masters);
>  		list_add(&master_domain->devices_elm, &smmu_domain->devices);
> @@ -3450,6 +3484,7 @@ static struct iommu_ops arm_smmu_ops = {
>  	.owner			= THIS_MODULE,
>  	.default_domain_ops = &(const struct iommu_domain_ops) {
>  		.attach_dev		= arm_smmu_attach_dev,
> +		.enforce_cache_coherency = arm_smmu_enforce_cache_coherency,
>  		.set_dev_pasid		= arm_smmu_s1_set_dev_pasid,
>  		.map_pages		= arm_smmu_map_pages,
>  		.unmap_pages		= arm_smmu_unmap_pages,
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 7e8d2f36faebf3..45882f65bfcad0 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -787,6 +787,7 @@ struct arm_smmu_domain {
>  	/* List of struct arm_smmu_master_domain */
>  	struct list_head		devices;
>  	spinlock_t			devices_lock;
> +	bool				enforce_cache_coherency : 1;
>  
>  	struct mmu_notifier		mmu_notifier;
>  };
> -- 
> 2.46.0
>
Jason Gunthorpe Aug. 30, 2024, 5:10 p.m. UTC | #4
On Fri, Aug 30, 2024 at 03:19:16PM +0000, Mostafa Saleh wrote:
> > @@ -2263,6 +2266,28 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
> >  	}
> >  }
> >  
> > +static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain)
> > +{
> > +	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> > +	struct arm_smmu_master_domain *master_domain;
> > +	unsigned long flags;
> > +	bool ret = false;
> nit: we can avoid the goto, if we inverse the logic of ret (and set it
> to false if device doesn't support CANWBS)

Yeah, that is tidier:

static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain)
{
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
	struct arm_smmu_master_domain *master_domain;
	unsigned long flags;
	bool ret = true;

	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
	list_for_each_entry(master_domain, &smmu_domain->devices,
			    devices_elm) {
		if (!arm_smmu_master_canwbs(master_domain->master)) {
			ret = false;
			break;
		}
	}
	smmu_domain->enforce_cache_coherency = ret;
	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
	return ret;
}

Thanks,
Jason
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index e2b97ad6d74b03..c2021e821e5cb6 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2253,6 +2253,9 @@  static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
 	case IOMMU_CAP_CACHE_COHERENCY:
 		/* Assume that a coherent TCU implies coherent TBUs */
 		return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
+	case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
+		return dev_iommu_fwspec_get(dev)->flags &
+		       IOMMU_FWSPEC_PCI_RC_CANWBS;
 	case IOMMU_CAP_NOEXEC:
 	case IOMMU_CAP_DEFERRED_FLUSH:
 		return true;
@@ -2263,6 +2266,28 @@  static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
 	}
 }
 
+static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain)
+{
+	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+	struct arm_smmu_master_domain *master_domain;
+	unsigned long flags;
+	bool ret = false;
+
+	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+	list_for_each_entry(master_domain, &smmu_domain->devices,
+			    devices_elm) {
+		if (!(dev_iommu_fwspec_get(master_domain->master->dev)->flags &
+		      IOMMU_FWSPEC_PCI_RC_CANWBS))
+			goto out;
+	}
+
+	smmu_domain->enforce_cache_coherency = true;
+	ret = true;
+out:
+	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+	return ret;
+}
+
 struct arm_smmu_domain *arm_smmu_domain_alloc(void)
 {
 	struct arm_smmu_domain *smmu_domain;
@@ -2693,6 +2718,15 @@  static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
 		 * one of them.
 		 */
 		spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+		if (smmu_domain->enforce_cache_coherency &&
+		    !(dev_iommu_fwspec_get(master->dev)->flags &
+		      IOMMU_FWSPEC_PCI_RC_CANWBS)) {
+			kfree(master_domain);
+			spin_unlock_irqrestore(&smmu_domain->devices_lock,
+					       flags);
+			return -EINVAL;
+		}
+
 		if (state->ats_enabled)
 			atomic_inc(&smmu_domain->nr_ats_masters);
 		list_add(&master_domain->devices_elm, &smmu_domain->devices);
@@ -3450,6 +3484,7 @@  static struct iommu_ops arm_smmu_ops = {
 	.owner			= THIS_MODULE,
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev		= arm_smmu_attach_dev,
+		.enforce_cache_coherency = arm_smmu_enforce_cache_coherency,
 		.set_dev_pasid		= arm_smmu_s1_set_dev_pasid,
 		.map_pages		= arm_smmu_map_pages,
 		.unmap_pages		= arm_smmu_unmap_pages,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 7e8d2f36faebf3..45882f65bfcad0 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -787,6 +787,7 @@  struct arm_smmu_domain {
 	/* List of struct arm_smmu_master_domain */
 	struct list_head		devices;
 	spinlock_t			devices_lock;
+	bool				enforce_cache_coherency : 1;
 
 	struct mmu_notifier		mmu_notifier;
 };