diff mbox series

[v10,10/13] iommu/arm-smmu-v3: Check for SVA features

Message ID 20200918101852.582559-11-jean-philippe@linaro.org (mailing list archive)
State New, archived
Headers show
Series iommu: Shared Virtual Addressing for SMMUv3 (PT sharing part) | expand

Commit Message

Jean-Philippe Brucker Sept. 18, 2020, 10:18 a.m. UTC
Aggregate all sanity-checks for sharing CPU page tables with the SMMU
under a single ARM_SMMU_FEAT_SVA bit. For PCIe SVA, users also need to
check FEAT_ATS and FEAT_PRI. For platform SVA, they will have to check
FEAT_STALLS.

Introduce ARM_SMMU_FEAT_BTM (Broadcast TLB Maintenance), but don't
enable it at the moment. Since the entire VMID space is shared with the
CPU, enabling DVM (by clearing SMMU_CR2.PTM) could result in
over-invalidation and affect performance of stage-2 mappings.

Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
v10:
* Check that 52-bit VA is supported on the SMMU side if vabits_actual
  requires it.
* Check arm64_kernel_unmapped_at_el0() instead of
  CONFIG_UNMAP_KERNEL_AT_EL0
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   | 10 +++++
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   | 45 +++++++++++++++++++
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   |  3 ++
 3 files changed, 58 insertions(+)

Comments

Shameerali Kolothum Thodi Sept. 21, 2020, 8:59 a.m. UTC | #1
Hi Jean,

> -----Original Message-----
> From: iommu [mailto:iommu-bounces@lists.linux-foundation.org] On Behalf Of
> Jean-Philippe Brucker
> Sent: 18 September 2020 11:19
> To: iommu@lists.linux-foundation.org; linux-arm-kernel@lists.infradead.org;
> linux-mm@kvack.org
> Cc: fenghua.yu@intel.com; Jean-Philippe Brucker <jean-philippe@linaro.org>;
> catalin.marinas@arm.com; Suzuki K Poulose <suzuki.poulose@arm.com>;
> robin.murphy@arm.com; zhangfei.gao@linaro.org; will@kernel.org
> Subject: [PATCH v10 10/13] iommu/arm-smmu-v3: Check for SVA features
> 
> Aggregate all sanity-checks for sharing CPU page tables with the SMMU
> under a single ARM_SMMU_FEAT_SVA bit. For PCIe SVA, users also need to
> check FEAT_ATS and FEAT_PRI. For platform SVA, they will have to check
> FEAT_STALLS.
> 
> Introduce ARM_SMMU_FEAT_BTM (Broadcast TLB Maintenance), but don't
> enable it at the moment. Since the entire VMID space is shared with the
> CPU, enabling DVM (by clearing SMMU_CR2.PTM) could result in
> over-invalidation and affect performance of stage-2 mappings.
> 
> Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
> v10:
> * Check that 52-bit VA is supported on the SMMU side if vabits_actual
>   requires it.
> * Check arm64_kernel_unmapped_at_el0() instead of
>   CONFIG_UNMAP_KERNEL_AT_EL0
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   | 10 +++++
>  .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   | 45
> +++++++++++++++++++
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   |  3 ++
>  3 files changed, 58 insertions(+)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 90c08f156b43..7b14b48a26c7 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -602,6 +602,8 @@ struct arm_smmu_device {
>  #define ARM_SMMU_FEAT_STALL_FORCE	(1 << 13)
>  #define ARM_SMMU_FEAT_VAX		(1 << 14)
>  #define ARM_SMMU_FEAT_RANGE_INV		(1 << 15)
> +#define ARM_SMMU_FEAT_BTM		(1 << 16)
> +#define ARM_SMMU_FEAT_SVA		(1 << 17)
>  	u32				features;
> 
>  #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
> @@ -683,4 +685,12 @@ int arm_smmu_write_ctx_desc(struct
> arm_smmu_domain *smmu_domain, int ssid,
>  void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
>  bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd);
> 
> +#ifdef CONFIG_ARM_SMMU_V3_SVA
> +bool arm_smmu_sva_supported(struct arm_smmu_device *smmu);
> +#else /* CONFIG_ARM_SMMU_V3_SVA */
> +static inline bool arm_smmu_sva_supported(struct arm_smmu_device
> *smmu)
> +{
> +	return false;
> +}
> +#endif /* CONFIG_ARM_SMMU_V3_SVA */
>  #endif /* _ARM_SMMU_V3_H */
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> index ef3fcfa72187..cb94c0924196 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> @@ -152,3 +152,48 @@ static void arm_smmu_free_shared_cd(struct
> arm_smmu_ctx_desc *cd)
>  		kfree(cd);
>  	}
>  }
> +
> +bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
> +{
> +	unsigned long reg, fld;
> +	unsigned long oas;
> +	unsigned long asid_bits;
> +	u32 feat_mask = ARM_SMMU_FEAT_BTM |
> ARM_SMMU_FEAT_COHERENCY;

Why is BTM mandated for SVA? I couldn't find this requirement in SMMU spec
(Sorry if I missed it or this got discussed earlier). But if performance is the only concern here,
is it better just to allow it with a warning rather than limiting SMMUs without BTM?

Thanks,
Shameer

> +
> +	if (vabits_actual == 52)
> +		feat_mask |= ARM_SMMU_FEAT_VAX;
> +
> +	if ((smmu->features & feat_mask) != feat_mask)
> +		return false;
> +
> +	if (!(smmu->pgsize_bitmap & PAGE_SIZE))
> +		return false;
> +
> +	/*
> +	 * Get the smallest PA size of all CPUs (sanitized by cpufeature). We're
> +	 * not even pretending to support AArch32 here. Abort if the MMU
> outputs
> +	 * addresses larger than what we support.
> +	 */
> +	reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
> +	fld = cpuid_feature_extract_unsigned_field(reg,
> ID_AA64MMFR0_PARANGE_SHIFT);
> +	oas = id_aa64mmfr0_parange_to_phys_shift(fld);
> +	if (smmu->oas < oas)
> +		return false;
> +
> +	/* We can support bigger ASIDs than the CPU, but not smaller */
> +	fld = cpuid_feature_extract_unsigned_field(reg,
> ID_AA64MMFR0_ASID_SHIFT);
> +	asid_bits = fld ? 16 : 8;
> +	if (smmu->asid_bits < asid_bits)
> +		return false;
> +
> +	/*
> +	 * See max_pinned_asids in arch/arm64/mm/context.c. The following is
> +	 * generally the maximum number of bindable processes.
> +	 */
> +	if (arm64_kernel_unmapped_at_el0())
> +		asid_bits--;
> +	dev_dbg(smmu->dev, "%d shared contexts\n", (1 << asid_bits) -
> +		num_possible_cpus() - 2);
> +
> +	return true;
> +}
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index e99ebdd4c841..44c57bcfe112 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -3257,6 +3257,9 @@ static int arm_smmu_device_hw_probe(struct
> arm_smmu_device *smmu)
> 
>  	smmu->ias = max(smmu->ias, smmu->oas);
> 
> +	if (arm_smmu_sva_supported(smmu))
> +		smmu->features |= ARM_SMMU_FEAT_SVA;
> +
>  	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
>  		 smmu->ias, smmu->oas, smmu->features);
>  	return 0;
> --
> 2.28.0
> 
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
Jean-Philippe Brucker Sept. 24, 2020, 10:13 a.m. UTC | #2
Hi Shameer,

On Mon, Sep 21, 2020 at 08:59:39AM +0000, Shameerali Kolothum Thodi wrote:
> > +bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
> > +{
> > +	unsigned long reg, fld;
> > +	unsigned long oas;
> > +	unsigned long asid_bits;
> > +	u32 feat_mask = ARM_SMMU_FEAT_BTM |
> > ARM_SMMU_FEAT_COHERENCY;
> 
> Why is BTM mandated for SVA? I couldn't find this requirement in SMMU spec
> (Sorry if I missed it or this got discussed earlier). But if performance is the only concern here,
> is it better just to allow it with a warning rather than limiting SMMUs without BTM?

It's a performance concern and requires to support multiple
configurations, but the spec allows it. Are there SMMUs without BTM that
need it?

Thanks,
Jean
Shameerali Kolothum Thodi Sept. 24, 2020, 11:13 a.m. UTC | #3
> -----Original Message-----
> From: Jean-Philippe Brucker [mailto:jean-philippe@linaro.org]
> Sent: 24 September 2020 11:14
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
> Cc: iommu@lists.linux-foundation.org; linux-arm-kernel@lists.infradead.org;
> linux-mm@kvack.org; fenghua.yu@intel.com; catalin.marinas@arm.com;
> Suzuki K Poulose <suzuki.poulose@arm.com>; robin.murphy@arm.com;
> zhangfei.gao@linaro.org; will@kernel.org
> Subject: Re: [PATCH v10 10/13] iommu/arm-smmu-v3: Check for SVA features
> 
> Hi Shameer,
> 
> On Mon, Sep 21, 2020 at 08:59:39AM +0000, Shameerali Kolothum Thodi
> wrote:
> > > +bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
> > > +{
> > > +	unsigned long reg, fld;
> > > +	unsigned long oas;
> > > +	unsigned long asid_bits;
> > > +	u32 feat_mask = ARM_SMMU_FEAT_BTM |
> > > ARM_SMMU_FEAT_COHERENCY;
> >
> > Why is BTM mandated for SVA? I couldn't find this requirement in SMMU spec
> > (Sorry if I missed it or this got discussed earlier). But if performance is the
> only concern here,
> > is it better just to allow it with a warning rather than limiting SMMUs without
> BTM?
> 
> It's a performance concern and requires to support multiple
> configurations, but the spec allows it. Are there SMMUs without BTM that
> need it?

Ok. Thanks for clarifying. May be better to add a comment here. Our platforms
do support BTM, but I had a strange case where the UEFI didn't enable DVM
but SMMU reported BTM and was causing random failures due to lack of
explicit tlbi on mm invalidation. Anyway that doesn't count here :)

Thanks,
Shameer

> Thanks,
> Jean
Krishna Reddy Dec. 9, 2020, 7:49 p.m. UTC | #4
Hi Jean,
> > Why is BTM mandated for SVA? I couldn't find this requirement in 
> > SMMU spec (Sorry if I missed it or this got discussed earlier). But 
> > if performance is the
> only concern here,
> > is it better just to allow it with a warning rather than limiting 
> > SMMUs without
> BTM?
>
> It's a performance concern and requires to support multiple 
> configurations, but the spec allows it. Are there SMMUs without BTM 
> that need it?

The Tegra Next Generation SOC uses arm-smmu-v3, but it doesn't have support for BTM.
Do you have plan to get your earlier patch to handle invalidate notifications into upstream sometime soon? 
Can the dependency on BTM be relaxed with the patch?

PATCH v9 13/13] iommu/arm-smmu-v3: Hook up ATC invalidation to mm ops
https://www.spinics.net/lists/arm-kernel/msg825099.html

-KR
Will Deacon Dec. 9, 2020, 8:07 p.m. UTC | #5
On Wed, Dec 09, 2020 at 07:49:09PM +0000, Krishna Reddy wrote:
> > > Why is BTM mandated for SVA? I couldn't find this requirement in 
> > > SMMU spec (Sorry if I missed it or this got discussed earlier). But 
> > > if performance is the
> > only concern here,
> > > is it better just to allow it with a warning rather than limiting 
> > > SMMUs without
> > BTM?
> >
> > It's a performance concern and requires to support multiple 
> > configurations, but the spec allows it. Are there SMMUs without BTM 
> > that need it?
> 
> The Tegra Next Generation SOC uses arm-smmu-v3, but it doesn't have support for BTM.
> Do you have plan to get your earlier patch to handle invalidate
> notifications into upstream sometime soon? 

Is that a limitation of the SMMU implementation, the interconnect or the
integration?

Will
Krishna Reddy Dec. 9, 2020, 8:38 p.m. UTC | #6
> > The Tegra Next Generation SOC uses arm-smmu-v3, but it doesn't have support for BTM.
> > Do you have plan to get your earlier patch to handle invalidate 
> > notifications into upstream sometime soon?

>Is that a limitation of the SMMU implementation, the interconnect or the integration?

It is the limitation of interconnect. The DVM messages don't reach SMMU. 
The BTM bit in SMMU IDR does indicate that it doesn't support BTM. 

-KR
Jean-Philippe Brucker Dec. 14, 2020, 9:32 a.m. UTC | #7
On Wed, Dec 09, 2020 at 07:49:09PM +0000, Krishna Reddy wrote:
> Hi Jean,
> > > Why is BTM mandated for SVA? I couldn't find this requirement in 
> > > SMMU spec (Sorry if I missed it or this got discussed earlier). But 
> > > if performance is the
> > only concern here,
> > > is it better just to allow it with a warning rather than limiting 
> > > SMMUs without
> > BTM?
> >
> > It's a performance concern and requires to support multiple 
> > configurations, but the spec allows it. Are there SMMUs without BTM 
> > that need it?
> 
> The Tegra Next Generation SOC uses arm-smmu-v3, but it doesn't have support for BTM.
> Do you have plan to get your earlier patch to handle invalidate notifications into upstream sometime soon? 
> Can the dependency on BTM be relaxed with the patch?
> 
> PATCH v9 13/13] iommu/arm-smmu-v3: Hook up ATC invalidation to mm ops
> https://www.spinics.net/lists/arm-kernel/msg825099.html

This patch (which should be in 5.11) only takes care of sending ATC
invalidations to PCIe endpoints. With this, BTM is still required to
invalidate SMMU TLBs.

However we could enable command-queue invalidation when ARM_SMMU_FEAT_BTM
isn't set. Invalidations are still a relatively rare event so it may not
be outrageously slow. I can add a patch to my tree if you have hardware to
test. This could also be a first step for enabling SVA on other systems as
well, because I'm not finding time to work on BTM at the moment (requires
pinning VMIDs in KVM).

Thanks,
Jean
Krishna Reddy Dec. 14, 2020, 11:23 p.m. UTC | #8
>> The Tegra Next Generation SOC uses arm-smmu-v3, but it doesn't have support for BTM.
>> Do you have plan to get your earlier patch to handle invalidate notifications into upstream sometime soon?
>> Can the dependency on BTM be relaxed with the patch?
>>
>> PATCH v9 13/13] iommu/arm-smmu-v3: Hook up ATC invalidation to mm ops 
>> https://www.spinics.net/lists/arm-kernel/msg825099.html

>This patch (which should be in 5.11) only takes care of sending ATC invalidations to PCIe endpoints. With this, BTM is still required to invalidate SMMU TLBs.
> However we could enable command-queue invalidation when ARM_SMMU_FEAT_BTM isn't set.
> Invalidations are still a relatively rare event so it may not be outrageously slow. I can add a patch to my tree if you have hardware to test. 

Thanks! We can test the patch on emulation platform. 

-KR
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 90c08f156b43..7b14b48a26c7 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -602,6 +602,8 @@  struct arm_smmu_device {
 #define ARM_SMMU_FEAT_STALL_FORCE	(1 << 13)
 #define ARM_SMMU_FEAT_VAX		(1 << 14)
 #define ARM_SMMU_FEAT_RANGE_INV		(1 << 15)
+#define ARM_SMMU_FEAT_BTM		(1 << 16)
+#define ARM_SMMU_FEAT_SVA		(1 << 17)
 	u32				features;
 
 #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
@@ -683,4 +685,12 @@  int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd);
 
+#ifdef CONFIG_ARM_SMMU_V3_SVA
+bool arm_smmu_sva_supported(struct arm_smmu_device *smmu);
+#else /* CONFIG_ARM_SMMU_V3_SVA */
+static inline bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
+{
+	return false;
+}
+#endif /* CONFIG_ARM_SMMU_V3_SVA */
 #endif /* _ARM_SMMU_V3_H */
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index ef3fcfa72187..cb94c0924196 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -152,3 +152,48 @@  static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd)
 		kfree(cd);
 	}
 }
+
+bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
+{
+	unsigned long reg, fld;
+	unsigned long oas;
+	unsigned long asid_bits;
+	u32 feat_mask = ARM_SMMU_FEAT_BTM | ARM_SMMU_FEAT_COHERENCY;
+
+	if (vabits_actual == 52)
+		feat_mask |= ARM_SMMU_FEAT_VAX;
+
+	if ((smmu->features & feat_mask) != feat_mask)
+		return false;
+
+	if (!(smmu->pgsize_bitmap & PAGE_SIZE))
+		return false;
+
+	/*
+	 * Get the smallest PA size of all CPUs (sanitized by cpufeature). We're
+	 * not even pretending to support AArch32 here. Abort if the MMU outputs
+	 * addresses larger than what we support.
+	 */
+	reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+	fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_PARANGE_SHIFT);
+	oas = id_aa64mmfr0_parange_to_phys_shift(fld);
+	if (smmu->oas < oas)
+		return false;
+
+	/* We can support bigger ASIDs than the CPU, but not smaller */
+	fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_ASID_SHIFT);
+	asid_bits = fld ? 16 : 8;
+	if (smmu->asid_bits < asid_bits)
+		return false;
+
+	/*
+	 * See max_pinned_asids in arch/arm64/mm/context.c. The following is
+	 * generally the maximum number of bindable processes.
+	 */
+	if (arm64_kernel_unmapped_at_el0())
+		asid_bits--;
+	dev_dbg(smmu->dev, "%d shared contexts\n", (1 << asid_bits) -
+		num_possible_cpus() - 2);
+
+	return true;
+}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index e99ebdd4c841..44c57bcfe112 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -3257,6 +3257,9 @@  static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 
 	smmu->ias = max(smmu->ias, smmu->oas);
 
+	if (arm_smmu_sva_supported(smmu))
+		smmu->features |= ARM_SMMU_FEAT_SVA;
+
 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
 		 smmu->ias, smmu->oas, smmu->features);
 	return 0;