Message ID | 8f6fd78b4c4358e65e9d171d90aa4a3dac392f09.1722993435.git.nicolinc@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add Tegra241 (Grace) CMDQV Support (part 1/2) | expand |
On Tue, Aug 06, 2024 at 07:11:52PM -0700, Nicolin Chen wrote: > > -static int arm_smmu_device_acpi_probe(struct platform_device *pdev, > - struct arm_smmu_device *smmu) > +static struct arm_smmu_device * > +arm_smmu_impl_acpi_probe(struct arm_smmu_device *smmu, > + struct acpi_iort_node *node) > +{ > + /* > + * DSDT might hold some SMMU extension, so we have no option but to go > + * through the ACPI tables unconditionally. On success, this returns a > + * copy of smmu struct holding an impl pointer. Otherwise, an impl may > + * choose to return an ERR_PTR as an error out, or to return the pass- > + * in smmu pointer as a fallback to the standard SMMU. > + */ > + return arm_smmu_impl_acpi_dsdt_probe(smmu, node); > +} Lets generalize this a bit more and have the impl mechanism work for DT too.. Keep the main probe the same and add a new function after the dt/acpi steps: smmu = arm_smmu_probe_impl(smmu); if (IS_ERR(smmu)) return PTR_ERR(smmu); Which is more like: /* * Probe all the compiled in implementations. Each one checks to see if it * matches this HW and if so returns a devm_krealloc'd arm_smmu_device which * replaces the callers. Otherwise the original is returned or ERR_PTR. * */ static struct arm_smmu_device *arm_smmu_probe_impl(struct arm_smmu_device *orig_smmu) { struct arm_smmu_device *new_smmu; int ret; new_smmu = tegra241_cmdqv_acpi_dsdt_probe(orig_smmu); if (new_smmu != ERR_PTR(-ENODEV)) goto out_new_impl; return orig_smmu; out_new_impl: if (IS_ERR(new_smmu)) return new_smmu; /* FIXME: check is this ordering OK during remove? */ ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove, new_smmu); if (ret) return ERR_PTR(ret); return new_smmu; } Easy to add new sub implementations. Provide an inline ENODEV sub in the header file for tegra241_cmdqv_acpi_dsdt_probe Add something like this to the header to get the ACPI node: static inline struct acpi_iort_node * arm_smmu_get_iort_node(struct arm_smmu_device *smmu) { return *(struct acpi_iort_node **)dev_get_platdata(smmu->dev); } Since it isn't passed down > @@ -4560,6 +4602,7 @@ static void arm_smmu_device_remove(struct platform_device *pdev) > { > struct arm_smmu_device *smmu = platform_get_drvdata(pdev); > > + arm_smmu_impl_remove(smmu); Can't call this if devm has been used to set it up, and this would be in the wrong order anyhow. Just remove it.. I guess the devm was put for this to avoid adding goto error unwind to probe? > +struct arm_smmu_impl { > + int (*device_reset)(struct arm_smmu_device *smmu); > + void (*device_remove)(struct arm_smmu_device *smmu); > + struct arm_smmu_cmdq *(*get_secondary_cmdq)(struct arm_smmu_device *smmu); > +}; Can we put the word "ops" into this struct somehow? That would be a more typically kernely name. arm_smmu_impl_ops perhaps? > struct arm_smmu_device { > struct device *dev; > + /* An SMMUv3 implementation */ The comment is self explanatory Jason
Hi Jason, I've addressed all the comments here. Two additional replies below. On Wed, Aug 14, 2024 at 06:52:46PM -0300, Jason Gunthorpe wrote: > /* > * Probe all the compiled in implementations. Each one checks to see if it > * matches this HW and if so returns a devm_krealloc'd arm_smmu_device which > * replaces the callers. Otherwise the original is returned or ERR_PTR. > * > */ > static struct arm_smmu_device *arm_smmu_probe_impl(struct arm_smmu_device *orig_smmu) > { > struct arm_smmu_device *new_smmu; > int ret; > > new_smmu = tegra241_cmdqv_acpi_dsdt_probe(orig_smmu); > if (new_smmu != ERR_PTR(-ENODEV)) > goto out_new_impl; > return orig_smmu; > > out_new_impl: > if (IS_ERR(new_smmu)) > return new_smmu; > > /* FIXME: check is this ordering OK during remove? */ I am not able to test-verify this. At least CMDQV seems to be OK to remove after SMMU. > > @@ -4560,6 +4602,7 @@ static void arm_smmu_device_remove(struct platform_device *pdev) > > { > > struct arm_smmu_device *smmu = platform_get_drvdata(pdev); > > > > + arm_smmu_impl_remove(smmu); > > Can't call this if devm has been used to set it up, and this would be > in the wrong order anyhow. Just remove it.. I guess the devm was put > for this to avoid adding goto error unwind to probe? I got that from Will's patch, and I think so, as it does simplify the unwind routine. Thanks! Nicolin
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index e764236a9216..18d940c65e2c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -338,7 +338,12 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu) { - return &smmu->cmdq; + struct arm_smmu_cmdq *cmdq = NULL; + + if (smmu->impl && smmu->impl->get_secondary_cmdq) + cmdq = smmu->impl->get_secondary_cmdq(smmu); + + return cmdq ?: &smmu->cmdq; } static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu, @@ -4044,6 +4049,14 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu) return ret; } + if (smmu->impl && smmu->impl->device_reset) { + ret = smmu->impl->device_reset(smmu); + if (ret) { + dev_err(smmu->dev, "failed to reset impl\n"); + return ret; + } + } + return 0; } @@ -4347,8 +4360,23 @@ static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu) dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options); } -static int arm_smmu_device_acpi_probe(struct platform_device *pdev, - struct arm_smmu_device *smmu) +static struct arm_smmu_device * +arm_smmu_impl_acpi_probe(struct arm_smmu_device *smmu, + struct acpi_iort_node *node) +{ + /* + * DSDT might hold some SMMU extension, so we have no option but to go + * through the ACPI tables unconditionally. On success, this returns a + * copy of smmu struct holding an impl pointer. Otherwise, an impl may + * choose to return an ERR_PTR as an error out, or to return the pass- + * in smmu pointer as a fallback to the standard SMMU. + */ + return arm_smmu_impl_acpi_dsdt_probe(smmu, node); +} + +static struct arm_smmu_device * +arm_smmu_device_acpi_probe(struct platform_device *pdev, + struct arm_smmu_device *smmu) { struct acpi_iort_smmu_v3 *iort_smmu; struct device *dev = smmu->dev; @@ -4372,18 +4400,20 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, smmu->features |= ARM_SMMU_FEAT_HA; } - return 0; + return arm_smmu_impl_acpi_probe(smmu, node); } #else -static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev, - struct arm_smmu_device *smmu) +static struct arm_smmu_device * +arm_smmu_device_acpi_probe(struct platform_device *pdev, + struct arm_smmu_device *smmu) { - return -ENODEV; + return ERR_PTR(-ENODEV); } #endif -static int arm_smmu_device_dt_probe(struct platform_device *pdev, - struct arm_smmu_device *smmu) +static struct arm_smmu_device * +arm_smmu_device_dt_probe(struct platform_device *pdev, + struct arm_smmu_device *smmu) { struct device *dev = &pdev->dev; u32 cells; @@ -4401,7 +4431,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, if (of_dma_is_coherent(dev->of_node)) smmu->features |= ARM_SMMU_FEAT_COHERENCY; - return ret; + return ret ? ERR_PTR(ret) : smmu; } static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu) @@ -4453,6 +4483,14 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu) iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); } +static void arm_smmu_impl_remove(void *data) +{ + struct arm_smmu_device *smmu = data; + + if (smmu->impl && smmu->impl->device_remove) + smmu->impl->device_remove(smmu); +} + static int arm_smmu_device_probe(struct platform_device *pdev) { int irq, ret; @@ -4467,10 +4505,14 @@ static int arm_smmu_device_probe(struct platform_device *pdev) smmu->dev = dev; if (dev->of_node) { - ret = arm_smmu_device_dt_probe(pdev, smmu); + smmu = arm_smmu_device_dt_probe(pdev, smmu); } else { - ret = arm_smmu_device_acpi_probe(pdev, smmu); + smmu = arm_smmu_device_acpi_probe(pdev, smmu); } + if (IS_ERR(smmu)) + return PTR_ERR(smmu); + + ret = devm_add_action_or_reset(dev, arm_smmu_impl_remove, smmu); if (ret) return ret; @@ -4560,6 +4602,7 @@ static void arm_smmu_device_remove(struct platform_device *pdev) { struct arm_smmu_device *smmu = platform_get_drvdata(pdev); + arm_smmu_impl_remove(smmu); iommu_device_unregister(&smmu->iommu); iommu_device_sysfs_remove(&smmu->iommu); arm_smmu_device_disable(smmu); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 71818f586036..38d4a84e2c82 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -14,6 +14,9 @@ #include <linux/mmzone.h> #include <linux/sizes.h> +struct arm_smmu_device; +struct acpi_iort_node; + /* MMIO registers */ #define ARM_SMMU_IDR0 0x0 #define IDR0_ST_LVL GENMASK(28, 27) @@ -627,9 +630,25 @@ struct arm_smmu_strtab_cfg { u32 strtab_base_cfg; }; +struct arm_smmu_impl { + int (*device_reset)(struct arm_smmu_device *smmu); + void (*device_remove)(struct arm_smmu_device *smmu); + struct arm_smmu_cmdq *(*get_secondary_cmdq)(struct arm_smmu_device *smmu); +}; + +static inline struct arm_smmu_device * +arm_smmu_impl_acpi_dsdt_probe(struct arm_smmu_device *smmu, + struct acpi_iort_node *node) +{ + return smmu; +} + /* An SMMUv3 instance */ struct arm_smmu_device { struct device *dev; + /* An SMMUv3 implementation */ + const struct arm_smmu_impl *impl; + void __iomem *base; void __iomem *page1;
NVIDIA Tegra241 implemented SMMU in a slightly different way that supports a CMDQV extension feature as a secondary CMDQ for virtualization cases. Mimicing the arm-smmu (v2) driver, introduce a new struct arm_smmu_impl to accommodate impl routines. Suggested-by: Will Deacon <will@kernel.org> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 67 +++++++++++++++++---- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 19 ++++++ 2 files changed, 74 insertions(+), 12 deletions(-)