Message ID | 14-v2-de8b10590bf5+400-smmuv3_newapi_p1_jgg@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Update SMMUv3 to the modern iommu API (part 1/3) | expand |
Hi Jason, On 11/13/23 18:53, Jason Gunthorpe wrote: > Introducing global statics which are of type struct iommu_domain, not > struct arm_smmu_domain makes it difficult to retain > arm_smmu_master->domain, as it can no longer point to an IDENTITY or > BLOCKED domain. > > The only place that uses the value is arm_smmu_detach_dev(). Change things > to work like other drivers and call iommu_get_domain_for_dev() to obtain > the current domain. > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> This patch introduces a crash on my machine. See below. Eric [ 7.209854] list_del corruption, ffff007f82e5f890->next is NULL [ 7.216154] ------------[ cut here ]------------ [ 7.220952] kernel BUG at lib/list_debug.c:52! [ 7.225750] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP [ 7.232188] Modules linked in: [ 7.235407] CPU: 3 PID: 263 Comm: kworker/u97:1 Not tainted 6.7.0-rc2-upstream+ #11 [ 7.243598] Hardware name: FUJITSU FX700/CMUA , BIOS 1.8.0 Nov 26 2021 [ 7.251073] Workqueue: events_unbound deferred_probe_work_func [ 7.257452] pstate: 604000c9 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 7.264779] pc : __list_del_entry_valid_or_report+0x6c/0xd8 [ 7.270736] lr : __list_del_entry_valid_or_report+0x6c/0xd8 [ 7.276689] sp : ffff800089def970 [ 7.280164] x29: ffff800089def970 x28: ffff8000820f19c0 x27: ffff007f82e01248 [ 7.287869] x26: ffff007f80030f00 x25: ffff007f82e01268 x24: ffff007f82e00900 [ 7.295372] x23: ffff800082580cf8 x22: 0000000000000000 x21: 0000000000000000 [ 7.303075] x20: ffff007f82e009d8 x19: ffff007f82e5f880 x18: ffffffffffffffff [ 7.310773] x17: 0000000000000001 x16: 0000000000000040 x15: ffff800109def597 [ 7.318254] x14: 0000000000000000 x13: 4c4c554e20736920 x12: 7478656e3e2d3039 [ 7.325956] x11: 00000000ffff7fff x10: 00000000ffff7fff x9 : ffff80008012cee4 [ 7.333463] x8 : 00000000000bffe8 x7 : c0000000ffff7fff x6 : 00000000002bffa8 [ 7.341161] x5 : 0000000000007fff x4 : 0000000000000000 x3 : 0000000000000000 [ 7.348636] x2 : 0000000000000000 x1 : ffff007f824bc840 x0 : 0000000000000033 [ 7.356329] Call trace: [ 7.358961] __list_del_entry_valid_or_report+0x6c/0xd8 [ 7.364556] arm_smmu_detach_dev+0x4c/0x128 [ 7.368908] arm_smmu_attach_dev+0xe0/0x580 [ 7.373475] __iommu_attach_device+0x28/0xf8 [ 7.377934] __iommu_device_set_domain+0x74/0xd8 [ 7.382900] __iommu_probe_device+0x15c/0x270 [ 7.387423] iommu_probe_device+0x20/0x60 [ 7.391819] acpi_dma_configure_id+0xc4/0x150 [ 7.396365] pci_dma_configure+0xe8/0xf8 [ 7.400671] really_probe+0x78/0x3d0 [ 7.404412] __driver_probe_device+0x80/0x178 [ 7.409153] driver_probe_device+0x44/0x120 [ 7.413499] __device_attach_driver+0xb8/0x158 [ 7.418369] bus_for_each_drv+0x84/0xe8 [ 7.422387] __device_attach+0xac/0x1e0 [ 7.426407] device_initial_probe+0x18/0x28 [ 7.430946] bus_probe_device+0xa8/0xb8 [ 7.434942] deferred_probe_work_func+0xb8/0x110 [ 7.439918] process_one_work+0x174/0x3c8 [ 7.444110] worker_thread+0x2c8/0x3e0 [ 7.448222] kthread+0x100/0x110 [ 7.451616] ret_from_fork+0x10/0x20 [ 7.455386] Code: d65f03c0 b00062a0 910d2000 97ec9112 (d4210000) [ 7.461832] ---[ end trace 0000000000000000 ]--- [ 7.466827] Kernel panic - not syncing: Oops - BUG: Fatal exception [ 7.473439] SMP: stopping secondary CPUs [ 7.477729] Kernel Offset: disabled [ 7.481399] CPU features: 0x0,00000001,90028144,21017203 [ 7.487061] Memory Limit: none [ 7.490280] ---[ end Kernel panic - not syncing: Oops - BUG: Fatal exception ]--- > --- > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 21 +++++++-------------- > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 - > 2 files changed, 7 insertions(+), 15 deletions(-) > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > index 7d2dd3ea47ab68..23dda64722ea17 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > @@ -2480,19 +2480,20 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master) > > static void arm_smmu_detach_dev(struct arm_smmu_master *master) > { > + struct iommu_domain *domain = iommu_get_domain_for_dev(master->dev); > + struct arm_smmu_domain *smmu_domain; > unsigned long flags; > - struct arm_smmu_domain *smmu_domain = master->domain; > > - if (!smmu_domain) > + if (!domain) > return; > > + smmu_domain = to_smmu_domain(domain); > arm_smmu_disable_ats(master, smmu_domain); > > spin_lock_irqsave(&smmu_domain->devices_lock, flags); > list_del(&master->domain_head); > spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); > > - master->domain = NULL; > master->ats_enabled = false; > } > > @@ -2546,8 +2547,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) > > arm_smmu_detach_dev(master); > > - master->domain = smmu_domain; > - > /* > * The SMMU does not support enabling ATS with bypass. When the STE is > * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and > @@ -2566,10 +2565,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) > case ARM_SMMU_DOMAIN_S1: > if (!master->cd_table.cdtab) { > ret = arm_smmu_alloc_cd_tables(master); > - if (ret) { > - master->domain = NULL; > + if (ret) > goto out_list_del; > - } > } else { > /* > * arm_smmu_write_ctx_desc() relies on the entry being > @@ -2577,17 +2574,13 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) > */ > ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, > NULL); > - if (ret) { > - master->domain = NULL; > + if (ret) > goto out_list_del; > - } > } > > ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd); > - if (ret) { > - master->domain = NULL; > + if (ret) > goto out_list_del; > - } > > arm_smmu_make_cdtable_ste(&target, master, &master->cd_table); > arm_smmu_install_ste_for_dev(master, &target); > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > index 1be0c1151c50c3..21f2f73501019a 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > @@ -695,7 +695,6 @@ struct arm_smmu_stream { > struct arm_smmu_master { > struct arm_smmu_device *smmu; > struct device *dev; > - struct arm_smmu_domain *domain; > struct list_head domain_head; > struct arm_smmu_stream *streams; > /* Locked by the iommu core using the group mutex */
On Mon, Nov 27, 2023 at 06:14:30PM +0100, Eric Auger wrote: > Hi Jason, > > On 11/13/23 18:53, Jason Gunthorpe wrote: > > Introducing global statics which are of type struct iommu_domain, not > > struct arm_smmu_domain makes it difficult to retain > > arm_smmu_master->domain, as it can no longer point to an IDENTITY or > > BLOCKED domain. > > > > The only place that uses the value is arm_smmu_detach_dev(). Change things > > to work like other drivers and call iommu_get_domain_for_dev() to obtain > > the current domain. > > > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > > This patch introduces a crash on my machine. See below. Ah, your system must have multi-device groups The master->domain was subtly protecting the domain_head to ensure we don't touch it unless it is already in a domain list. This issue is solved in part 2 (iommu/arm-smmu-v3: Make smmu_domain->devices into an allocated list) which removes the domain_head. This hunk should fix this patch. I updated the github diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 23dda64722ea17..102e13b65bcdec 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2491,7 +2491,7 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master) arm_smmu_disable_ats(master, smmu_domain); spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_del(&master->domain_head); + list_del_init(&master->domain_head); spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); master->ats_enabled = false; @@ -2606,7 +2606,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) out_list_del: spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_del(&master->domain_head); + list_del_init(&master->domain_head); spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); out_unlock: @@ -2810,6 +2810,7 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) master->dev = dev; master->smmu = smmu; INIT_LIST_HEAD(&master->bonds); + INIT_LIST_HEAD(&master->domain_head); dev_iommu_priv_set(dev, master); ret = arm_smmu_insert_master(smmu, master); Thank!! Jason
On Mon, Nov 13, 2023 at 01:53:21PM -0400, Jason Gunthorpe wrote: > Introducing global statics which are of type struct iommu_domain, not > struct arm_smmu_domain makes it difficult to retain > arm_smmu_master->domain, as it can no longer point to an IDENTITY or > BLOCKED domain. > > The only place that uses the value is arm_smmu_detach_dev(). Change things > to work like other drivers and call iommu_get_domain_for_dev() to obtain > the current domain. > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> The new version on the github looks good to me. Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Hi Jason, On 11/30/23 13:03, Jason Gunthorpe wrote: > On Mon, Nov 27, 2023 at 06:14:30PM +0100, Eric Auger wrote: >> Hi Jason, >> >> On 11/13/23 18:53, Jason Gunthorpe wrote: >>> Introducing global statics which are of type struct iommu_domain, not >>> struct arm_smmu_domain makes it difficult to retain >>> arm_smmu_master->domain, as it can no longer point to an IDENTITY or >>> BLOCKED domain. >>> >>> The only place that uses the value is arm_smmu_detach_dev(). Change things >>> to work like other drivers and call iommu_get_domain_for_dev() to obtain >>> the current domain. >>> >>> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> >> >> This patch introduces a crash on my machine. See below. > > Ah, your system must have multi-device groups > > The master->domain was subtly protecting the domain_head to ensure > we don't touch it unless it is already in a domain list. This issue is > solved in part 2 (iommu/arm-smmu-v3: Make smmu_domain->devices into an > allocated list) which removes the domain_head. > > This hunk should fix this patch. I updated the github I confirm that the hunk hereafter fixes the crash. Thanks Eric > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > index 23dda64722ea17..102e13b65bcdec 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > @@ -2491,7 +2491,7 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master) > arm_smmu_disable_ats(master, smmu_domain); > > spin_lock_irqsave(&smmu_domain->devices_lock, flags); > - list_del(&master->domain_head); > + list_del_init(&master->domain_head); > spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); > > master->ats_enabled = false; > @@ -2606,7 +2606,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) > > out_list_del: > spin_lock_irqsave(&smmu_domain->devices_lock, flags); > - list_del(&master->domain_head); > + list_del_init(&master->domain_head); > spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); > > out_unlock: > @@ -2810,6 +2810,7 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) > master->dev = dev; > master->smmu = smmu; > INIT_LIST_HEAD(&master->bonds); > + INIT_LIST_HEAD(&master->domain_head); > dev_iommu_priv_set(dev, master); > > ret = arm_smmu_insert_master(smmu, master); > > > > Thank!! > Jason >
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 7d2dd3ea47ab68..23dda64722ea17 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2480,19 +2480,20 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master) static void arm_smmu_detach_dev(struct arm_smmu_master *master) { + struct iommu_domain *domain = iommu_get_domain_for_dev(master->dev); + struct arm_smmu_domain *smmu_domain; unsigned long flags; - struct arm_smmu_domain *smmu_domain = master->domain; - if (!smmu_domain) + if (!domain) return; + smmu_domain = to_smmu_domain(domain); arm_smmu_disable_ats(master, smmu_domain); spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_del(&master->domain_head); spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); - master->domain = NULL; master->ats_enabled = false; } @@ -2546,8 +2547,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) arm_smmu_detach_dev(master); - master->domain = smmu_domain; - /* * The SMMU does not support enabling ATS with bypass. When the STE is * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and @@ -2566,10 +2565,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) case ARM_SMMU_DOMAIN_S1: if (!master->cd_table.cdtab) { ret = arm_smmu_alloc_cd_tables(master); - if (ret) { - master->domain = NULL; + if (ret) goto out_list_del; - } } else { /* * arm_smmu_write_ctx_desc() relies on the entry being @@ -2577,17 +2574,13 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) */ ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL); - if (ret) { - master->domain = NULL; + if (ret) goto out_list_del; - } } ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd); - if (ret) { - master->domain = NULL; + if (ret) goto out_list_del; - } arm_smmu_make_cdtable_ste(&target, master, &master->cd_table); arm_smmu_install_ste_for_dev(master, &target); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 1be0c1151c50c3..21f2f73501019a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -695,7 +695,6 @@ struct arm_smmu_stream { struct arm_smmu_master { struct arm_smmu_device *smmu; struct device *dev; - struct arm_smmu_domain *domain; struct list_head domain_head; struct arm_smmu_stream *streams; /* Locked by the iommu core using the group mutex */
Introducing global statics which are of type struct iommu_domain, not struct arm_smmu_domain makes it difficult to retain arm_smmu_master->domain, as it can no longer point to an IDENTITY or BLOCKED domain. The only place that uses the value is arm_smmu_detach_dev(). Change things to work like other drivers and call iommu_get_domain_for_dev() to obtain the current domain. Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 21 +++++++-------------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 - 2 files changed, 7 insertions(+), 15 deletions(-)