Message ID | 6-v2-b0417f84403e+11f-iommu_group_locking_jgg@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Refine the locking for dev->iommu_group | expand |
Hi Jason, On 31.07.2023 19:50, Jason Gunthorpe wrote: > Use the new helper. > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > --- > drivers/iommu/rockchip-iommu.c | 22 ++-------------------- > 1 file changed, 2 insertions(+), 20 deletions(-) After applying your recent fixes from "[PATCH 0/3] Fix device_lock deadlock on two probe() paths" thread I've decided to run more tests on all boards I have. This way I found that this patch breaks booting of Odroid-M1 board, which is ARM64 system based on Rockchip RK3568 (arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts). Here is the log: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000028 Mem abort info: ... Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP Modules linked in: hantro_vpu(+) crct10dif_ce rockchip_saradc v4l2_vp9 industrialio_triggered_buffer v4l2_h264 kfifo_buf display_connector dwmac_rk(+) v4l2_mem2mem snd_soc_simple_card stmmac_platform videobuf2_dma_contig snd_soc_simple_card_utils videobuf2_memops rockchip_thermal videobuf2_v4l2 stmmac videodev pcs_xpcs snd_soc_rockchip_i2s_tdm videobuf2_common rtc_rk808 mc panfrost rockchipdrm drm_shmem_helper gpu_sched analogix_dp dw_mipi_dsi dw_hdmi drm_display_helper ip_tables x_tables ipv6 CPU: 2 PID: 147 Comm: systemd-udevd Not tainted 6.5.0-rc1+ #13826 Hardware name: Hardkernel ODROID-M1 (DT) pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : generic_single_device_group+0x24/0x88 lr : generic_single_device_group+0x5c/0x88 ... Call trace: generic_single_device_group+0x24/0x88 __iommu_probe_device+0xe8/0x444 iommu_probe_device+0x1c/0x54 of_iommu_configure+0x10c/0x200 of_dma_configure_id+0x1e0/0x3b4 platform_dma_configure+0x30/0x78 really_probe+0x70/0x2b4 __driver_probe_device+0x78/0x12c driver_probe_device+0x3c/0x160 __driver_attach+0x9c/0x1ac bus_for_each_dev+0x74/0xd4 driver_attach+0x24/0x30 bus_add_driver+0xe4/0x1e8 driver_register+0x60/0x128 __platform_driver_register+0x28/0x34 hantro_driver_init+0x20/0x1000 [hantro_vpu] do_one_initcall+0x74/0x2f0 do_init_module+0x58/0x1ec load_module+0x1a20/0x1c64 init_module_from_file+0x84/0xc0 idempotent_init_module+0x180/0x250 __arm64_sys_finit_module+0x64/0xa0 invoke_syscall+0x48/0x114 el0_svc_common.constprop.0+0xec/0x10c do_el0_svc+0x38/0xa4 el0_svc+0x40/0xac el0t_64_sync_handler+0xc0/0xc4 el0t_64_sync+0x190/0x194 ---[ end trace 0000000000000000 ]--- It turns out that iommu pointer gathered from dev->iommu->iommu_dev is NULL in generic_single_device_group() in the above call stack. Reverting $subject on top of linux-next+"[PATCH 0/3] Fix device_lock deadlock on two probe() paths" fixes booting. > diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c > index 8ff69fbf9f65db..91f13cc9411548 100644 > --- a/drivers/iommu/rockchip-iommu.c > +++ b/drivers/iommu/rockchip-iommu.c > @@ -113,7 +113,6 @@ struct rk_iommu { > struct iommu_device iommu; > struct list_head node; /* entry in rk_iommu_domain.iommus */ > struct iommu_domain *domain; /* domain to which iommu is attached */ > - struct iommu_group *group; > }; > > struct rk_iommudata { > @@ -1155,15 +1154,6 @@ static void rk_iommu_release_device(struct device *dev) > device_link_del(data->link); > } > > -static struct iommu_group *rk_iommu_device_group(struct device *dev) > -{ > - struct rk_iommu *iommu; > - > - iommu = rk_iommu_from_dev(dev); > - > - return iommu_group_ref_get(iommu->group); > -} > - > static int rk_iommu_of_xlate(struct device *dev, > struct of_phandle_args *args) > { > @@ -1189,7 +1179,7 @@ static const struct iommu_ops rk_iommu_ops = { > .domain_alloc = rk_iommu_domain_alloc, > .probe_device = rk_iommu_probe_device, > .release_device = rk_iommu_release_device, > - .device_group = rk_iommu_device_group, > + .device_group = generic_single_device_group, > #ifdef CONFIG_ARM > .set_platform_dma_ops = rk_iommu_set_platform_dma, > #endif > @@ -1280,15 +1270,9 @@ static int rk_iommu_probe(struct platform_device *pdev) > if (err) > return err; > > - iommu->group = iommu_group_alloc(); > - if (IS_ERR(iommu->group)) { > - err = PTR_ERR(iommu->group); > - goto err_unprepare_clocks; > - } > - > err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev)); > if (err) > - goto err_put_group; > + goto err_unprepare_clocks; > > err = iommu_device_register(&iommu->iommu, &rk_iommu_ops, dev); > if (err) > @@ -1325,8 +1309,6 @@ static int rk_iommu_probe(struct platform_device *pdev) > pm_runtime_disable(dev); > err_remove_sysfs: > iommu_device_sysfs_remove(&iommu->iommu); > -err_put_group: > - iommu_group_put(iommu->group); > err_unprepare_clocks: > clk_bulk_unprepare(iommu->num_clocks, iommu->clocks); > return err; Best regards
On Wed, Aug 09, 2023 at 03:19:34PM +0200, Marek Szyprowski wrote: > Hi Jason, > > On 31.07.2023 19:50, Jason Gunthorpe wrote: > > Use the new helper. > > > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > > --- > > drivers/iommu/rockchip-iommu.c | 22 ++-------------------- > > 1 file changed, 2 insertions(+), 20 deletions(-) > > After applying your recent fixes from "[PATCH 0/3] Fix device_lock > deadlock on two probe() paths" thread I've decided to run more tests on > all boards I have. This way I found that this patch breaks booting of > Odroid-M1 board, which is ARM64 system based on Rockchip RK3568 > (arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts). Here is the log: Is this why? diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 8842f4975ec4a8..8677d3ace47bbe 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -366,6 +366,7 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops) ret = PTR_ERR(iommu_dev); goto err_module_put; } + dev->iommu->iommu_dev = iommu_dev; ret = iommu_device_link(iommu_dev, dev); if (ret) @@ -383,7 +384,6 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops) dev->iommu_group = group; mutex_unlock(&dev_iommu_group_lock); - dev->iommu->iommu_dev = iommu_dev; dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); if (ops->is_attach_deferred) dev->iommu->attach_deferred = ops->is_attach_deferred(dev); @@ -397,6 +397,7 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops) err_module_put: module_put(ops->owner); err_free: + dev->iommu->iommu_dev = NULL; dev_iommu_free(dev); return ret; } Thanks, Jason
Hi Jason, On 09.08.2023 15:51, Jason Gunthorpe wrote: > On Wed, Aug 09, 2023 at 03:19:34PM +0200, Marek Szyprowski wrote: >> On 31.07.2023 19:50, Jason Gunthorpe wrote: >>> Use the new helper. >>> >>> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> >>> --- >>> drivers/iommu/rockchip-iommu.c | 22 ++-------------------- >>> 1 file changed, 2 insertions(+), 20 deletions(-) >> After applying your recent fixes from "[PATCH 0/3] Fix device_lock >> deadlock on two probe() paths" thread I've decided to run more tests on >> all boards I have. This way I found that this patch breaks booting of >> Odroid-M1 board, which is ARM64 system based on Rockchip RK3568 >> (arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts). Here is the log: > Is this why? Right, this fixed the issue. Tested-by: Marek Szyprowski <m.szyprowski@samsung.com> > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c > index 8842f4975ec4a8..8677d3ace47bbe 100644 > --- a/drivers/iommu/iommu.c > +++ b/drivers/iommu/iommu.c > @@ -366,6 +366,7 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops) > ret = PTR_ERR(iommu_dev); > goto err_module_put; > } > + dev->iommu->iommu_dev = iommu_dev; > > ret = iommu_device_link(iommu_dev, dev); > if (ret) > @@ -383,7 +384,6 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops) > dev->iommu_group = group; > mutex_unlock(&dev_iommu_group_lock); > > - dev->iommu->iommu_dev = iommu_dev; > dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); > if (ops->is_attach_deferred) > dev->iommu->attach_deferred = ops->is_attach_deferred(dev); > @@ -397,6 +397,7 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops) > err_module_put: > module_put(ops->owner); > err_free: > + dev->iommu->iommu_dev = NULL; > dev_iommu_free(dev); > return ret; > } > > Thanks, > Jason > Best regards
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 8ff69fbf9f65db..91f13cc9411548 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -113,7 +113,6 @@ struct rk_iommu { struct iommu_device iommu; struct list_head node; /* entry in rk_iommu_domain.iommus */ struct iommu_domain *domain; /* domain to which iommu is attached */ - struct iommu_group *group; }; struct rk_iommudata { @@ -1155,15 +1154,6 @@ static void rk_iommu_release_device(struct device *dev) device_link_del(data->link); } -static struct iommu_group *rk_iommu_device_group(struct device *dev) -{ - struct rk_iommu *iommu; - - iommu = rk_iommu_from_dev(dev); - - return iommu_group_ref_get(iommu->group); -} - static int rk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) { @@ -1189,7 +1179,7 @@ static const struct iommu_ops rk_iommu_ops = { .domain_alloc = rk_iommu_domain_alloc, .probe_device = rk_iommu_probe_device, .release_device = rk_iommu_release_device, - .device_group = rk_iommu_device_group, + .device_group = generic_single_device_group, #ifdef CONFIG_ARM .set_platform_dma_ops = rk_iommu_set_platform_dma, #endif @@ -1280,15 +1270,9 @@ static int rk_iommu_probe(struct platform_device *pdev) if (err) return err; - iommu->group = iommu_group_alloc(); - if (IS_ERR(iommu->group)) { - err = PTR_ERR(iommu->group); - goto err_unprepare_clocks; - } - err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev)); if (err) - goto err_put_group; + goto err_unprepare_clocks; err = iommu_device_register(&iommu->iommu, &rk_iommu_ops, dev); if (err) @@ -1325,8 +1309,6 @@ static int rk_iommu_probe(struct platform_device *pdev) pm_runtime_disable(dev); err_remove_sysfs: iommu_device_sysfs_remove(&iommu->iommu); -err_put_group: - iommu_group_put(iommu->group); err_unprepare_clocks: clk_bulk_unprepare(iommu->num_clocks, iommu->clocks); return err;
Use the new helper. Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> --- drivers/iommu/rockchip-iommu.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-)