Message ID | 20220304082350.30069-1-quic_jinlmao@quicinc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] coresight: core: Fix coresight device probe failure issue | expand |
Hi Jinlong On 04/03/2022 08:23, Mao Jinlong wrote: > It is possibe that probe failure issue happens when the device > and its child_device's probe happens at the same time. > In coresight_make_links, has_conns_grp is true for parent, but > has_conns_grp is false for child device as has_conns_grp is set > to true in coresight_create_conns_sysfs_group. The probe of parent > device will fail at this condition. Add has_conns_grp check for > child device before make the links and make the process from > device_register to connection_create be atomic to avoid this > probe failure issue. > > Suggested-by: Suzuki K Poulose <suzuki.poulose@arm.com> > Suggested-by: Mike Leach <mike.leach@linaro.org> > Signed-off-by: Mao Jinlong <quic_jinlmao@quicinc.com> > --- > drivers/hwtracing/coresight/coresight-core.c | 12 ++++++------ > 1 file changed, 6 insertions(+), 6 deletions(-) > > diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c > index 88653d1c06a4..b3e3bc59c09b 100644 > --- a/drivers/hwtracing/coresight/coresight-core.c > +++ b/drivers/hwtracing/coresight/coresight-core.c > @@ -1382,7 +1382,7 @@ static int coresight_fixup_device_conns(struct coresight_device *csdev) > continue; > conn->child_dev = > coresight_find_csdev_by_fwnode(conn->child_fwnode); > - if (conn->child_dev) { > + if (conn->child_dev && conn->child_dev->has_conns_grp) { > ret = coresight_make_links(csdev, conn, > conn->child_dev); > if (ret) > @@ -1594,7 +1594,8 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) > refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL); > if (!refcnts) { > ret = -ENOMEM; > - goto err_free_csdev; > + kfree(csdev); > + goto err_out; > } > > csdev->refcnt = refcnts; > @@ -1619,8 +1620,10 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) > csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev)); > dev_set_name(&csdev->dev, "%s", desc->name); > > + mutex_lock(&coresight_mutex); > ret = device_register(&csdev->dev); > if (ret) { > + mutex_unlock(&coresight_mutex); > put_device(&csdev->dev); > /* > * All resources are free'd explicitly via > @@ -1634,6 +1637,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) > ret = etm_perf_add_symlink_sink(csdev); > > if (ret) { > + mutex_unlock(&coresight_mutex); > device_unregister(&csdev->dev); > /* > * As with the above, all resources are free'd > @@ -1645,8 +1649,6 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) > } > } > > - mutex_lock(&coresight_mutex); > - > ret = coresight_create_conns_sysfs_group(csdev); > if (!ret) > ret = coresight_fixup_device_conns(csdev); > @@ -1663,8 +1665,6 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) > > return csdev; > > -err_free_csdev: > - kfree(csdev); > err_out: > /* Cleanup the connection information */ > coresight_release_platform_data(NULL, desc->pdata); Could we consolidate the unlock sequence to a single point with something like this (untested): diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index af00dca8d1ac..198ee140c6e6 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -1571,6 +1571,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) int nr_refcnts = 1; atomic_t *refcnts = NULL; struct coresight_device *csdev; + bool registered = false; csdev = kzalloc(sizeof(*csdev), GFP_KERNEL); if (!csdev) { @@ -1591,7 +1592,8 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL); if (!refcnts) { ret = -ENOMEM; - goto err_free_csdev; + kfree(csdev); + goto err_out; } csdev->refcnt = refcnts; @@ -1616,6 +1618,13 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev)); dev_set_name(&csdev->dev, "%s", desc->name); + /* + * Make sure the device registration and the connection fixup + * are synchronised, so that we don't see uninitialised devices + * on the coresight bus while trying to resolve the connections. + */ + mutex_lock(&coresight_mutex); + ret = device_register(&csdev->dev); if (ret) { put_device(&csdev->dev); @@ -1623,7 +1632,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) * All resources are free'd explicitly via * coresight_device_release(), triggered from put_device(). */ - goto err_out; + goto out_unlock; } if (csdev->type == CORESIGHT_DEV_TYPE_SINK || @@ -1638,11 +1647,11 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) * from put_device(), which is in turn called from * function device_unregister(). */ - goto err_out; + goto out_unlock; } } - - mutex_lock(&coresight_mutex); + /* Device is now registered */ + registered = true; ret = coresight_create_conns_sysfs_group(csdev); if (!ret) @@ -1652,16 +1661,15 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) if (!ret && cti_assoc_ops && cti_assoc_ops->add) cti_assoc_ops->add(csdev); +out_unlock: mutex_unlock(&coresight_mutex); - if (ret) { + /* Success */ + if (!ret) + return csdev; + + /* Unregister the device if needed */ + if (registered) coresight_unregister(csdev); - return ERR_PTR(ret); - } - - return csdev; - -err_free_csdev: - kfree(csdev); err_out: /* Cleanup the connection information */ coresight_release_platform_data(NULL, desc->pdata);
On 08/03/2022 11:56, Suzuki K Poulose wrote: > Hi Jinlong > > On 04/03/2022 08:23, Mao Jinlong wrote: >> It is possibe that probe failure issue happens when the device >> and its child_device's probe happens at the same time. >> In coresight_make_links, has_conns_grp is true for parent, but >> has_conns_grp is false for child device as has_conns_grp is set >> to true in coresight_create_conns_sysfs_group. The probe of parent >> device will fail at this condition. Add has_conns_grp check for >> child device before make the links and make the process from >> device_register to connection_create be atomic to avoid this >> probe failure issue. >> >> Suggested-by: Suzuki K Poulose <suzuki.poulose@arm.com> >> Suggested-by: Mike Leach <mike.leach@linaro.org> >> Signed-off-by: Mao Jinlong <quic_jinlmao@quicinc.com> >> --- >> drivers/hwtracing/coresight/coresight-core.c | 12 ++++++------ >> 1 file changed, 6 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/hwtracing/coresight/coresight-core.c >> b/drivers/hwtracing/coresight/coresight-core.c >> index 88653d1c06a4..b3e3bc59c09b 100644 >> --- a/drivers/hwtracing/coresight/coresight-core.c >> +++ b/drivers/hwtracing/coresight/coresight-core.c >> @@ -1382,7 +1382,7 @@ static int coresight_fixup_device_conns(struct >> coresight_device *csdev) >> continue; >> conn->child_dev = >> coresight_find_csdev_by_fwnode(conn->child_fwnode); >> - if (conn->child_dev) { >> + if (conn->child_dev && conn->child_dev->has_conns_grp) { >> ret = coresight_make_links(csdev, conn, >> conn->child_dev); >> if (ret) >> @@ -1594,7 +1594,8 @@ struct coresight_device >> *coresight_register(struct coresight_desc *desc) >> refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL); >> if (!refcnts) { >> ret = -ENOMEM; >> - goto err_free_csdev; >> + kfree(csdev); >> + goto err_out; >> } >> csdev->refcnt = refcnts; >> @@ -1619,8 +1620,10 @@ struct coresight_device >> *coresight_register(struct coresight_desc *desc) >> csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev)); >> dev_set_name(&csdev->dev, "%s", desc->name); >> + mutex_lock(&coresight_mutex); >> ret = device_register(&csdev->dev); >> if (ret) { >> + mutex_unlock(&coresight_mutex); >> put_device(&csdev->dev); >> /* >> * All resources are free'd explicitly via >> @@ -1634,6 +1637,7 @@ struct coresight_device >> *coresight_register(struct coresight_desc *desc) >> ret = etm_perf_add_symlink_sink(csdev); >> if (ret) { >> + mutex_unlock(&coresight_mutex); >> device_unregister(&csdev->dev); >> /* >> * As with the above, all resources are free'd >> @@ -1645,8 +1649,6 @@ struct coresight_device >> *coresight_register(struct coresight_desc *desc) >> } >> } >> - mutex_lock(&coresight_mutex); >> - >> ret = coresight_create_conns_sysfs_group(csdev); >> if (!ret) >> ret = coresight_fixup_device_conns(csdev); >> @@ -1663,8 +1665,6 @@ struct coresight_device >> *coresight_register(struct coresight_desc *desc) >> return csdev; >> -err_free_csdev: >> - kfree(csdev); >> err_out: >> /* Cleanup the connection information */ >> coresight_release_platform_data(NULL, desc->pdata); > > Could we consolidate the unlock sequence to a single point with > something like this (untested): > > > diff --git a/drivers/hwtracing/coresight/coresight-core.c > b/drivers/hwtracing/coresight/coresight-core.c > index af00dca8d1ac..198ee140c6e6 100644 > --- a/drivers/hwtracing/coresight/coresight-core.c > +++ b/drivers/hwtracing/coresight/coresight-core.c > @@ -1571,6 +1571,7 @@ struct coresight_device *coresight_register(struct > coresight_desc *desc) > int nr_refcnts = 1; > atomic_t *refcnts = NULL; > struct coresight_device *csdev; > + bool registered = false; > > csdev = kzalloc(sizeof(*csdev), GFP_KERNEL); > if (!csdev) { > @@ -1591,7 +1592,8 @@ struct coresight_device *coresight_register(struct > coresight_desc *desc) > refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL); > if (!refcnts) { > ret = -ENOMEM; > - goto err_free_csdev; > + kfree(csdev); > + goto err_out; > } > > csdev->refcnt = refcnts; > @@ -1616,6 +1618,13 @@ struct coresight_device > *coresight_register(struct coresight_desc *desc) > csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev)); > dev_set_name(&csdev->dev, "%s", desc->name); > > + /* > + * Make sure the device registration and the connection fixup > + * are synchronised, so that we don't see uninitialised devices > + * on the coresight bus while trying to resolve the connections. > + */ > + mutex_lock(&coresight_mutex); > + > ret = device_register(&csdev->dev); > if (ret) { > put_device(&csdev->dev); > @@ -1623,7 +1632,7 @@ struct coresight_device *coresight_register(struct > coresight_desc *desc) > * All resources are free'd explicitly via > * coresight_device_release(), triggered from put_device(). > */ > - goto err_out; > + goto out_unlock; > } > > if (csdev->type == CORESIGHT_DEV_TYPE_SINK || > @@ -1638,11 +1647,11 @@ struct coresight_device > *coresight_register(struct coresight_desc *desc) > * from put_device(), which is in turn called from > * function device_unregister(). > */ > - goto err_out; > + goto out_unlock; > } > } > - > - mutex_lock(&coresight_mutex); > + /* Device is now registered */ > + registered = true; > > ret = coresight_create_conns_sysfs_group(csdev); > if (!ret) > @@ -1652,16 +1661,15 @@ struct coresight_device > *coresight_register(struct coresight_desc *desc) > if (!ret && cti_assoc_ops && cti_assoc_ops->add) > cti_assoc_ops->add(csdev); > > +out_unlock: > mutex_unlock(&coresight_mutex); > - if (ret) { > + /* Success */ > + if (!ret) > + return csdev; > + > + /* Unregister the device if needed */ > + if (registered) > coresight_unregister(csdev); > - return ERR_PTR(ret); > - } Apologies, this should be preserved : i.e, if (registered) { coresight_unregister(csdev); return ERR_PTR(ret); } Thanks Suzuki
Hi Suzuki, On 3/8/2022 9:36 PM, Suzuki K Poulose wrote: > On 08/03/2022 11:56, Suzuki K Poulose wrote: >> Hi Jinlong >> >> On 04/03/2022 08:23, Mao Jinlong wrote: >>> It is possibe that probe failure issue happens when the device >>> and its child_device's probe happens at the same time. >>> In coresight_make_links, has_conns_grp is true for parent, but >>> has_conns_grp is false for child device as has_conns_grp is set >>> to true in coresight_create_conns_sysfs_group. The probe of parent >>> device will fail at this condition. Add has_conns_grp check for >>> child device before make the links and make the process from >>> device_register to connection_create be atomic to avoid this >>> probe failure issue. >>> >>> Suggested-by: Suzuki K Poulose <suzuki.poulose@arm.com> >>> Suggested-by: Mike Leach <mike.leach@linaro.org> >>> Signed-off-by: Mao Jinlong <quic_jinlmao@quicinc.com> >>> --- >>> drivers/hwtracing/coresight/coresight-core.c | 12 ++++++------ >>> 1 file changed, 6 insertions(+), 6 deletions(-) >>> >>> diff --git a/drivers/hwtracing/coresight/coresight-core.c >>> b/drivers/hwtracing/coresight/coresight-core.c >>> index 88653d1c06a4..b3e3bc59c09b 100644 >>> --- a/drivers/hwtracing/coresight/coresight-core.c >>> +++ b/drivers/hwtracing/coresight/coresight-core.c >>> @@ -1382,7 +1382,7 @@ static int coresight_fixup_device_conns(struct >>> coresight_device *csdev) >>> continue; >>> conn->child_dev = >>> coresight_find_csdev_by_fwnode(conn->child_fwnode); >>> - if (conn->child_dev) { >>> + if (conn->child_dev && conn->child_dev->has_conns_grp) { >>> ret = coresight_make_links(csdev, conn, >>> conn->child_dev); >>> if (ret) >>> @@ -1594,7 +1594,8 @@ struct coresight_device >>> *coresight_register(struct coresight_desc *desc) >>> refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL); >>> if (!refcnts) { >>> ret = -ENOMEM; >>> - goto err_free_csdev; >>> + kfree(csdev); >>> + goto err_out; >>> } >>> csdev->refcnt = refcnts; >>> @@ -1619,8 +1620,10 @@ struct coresight_device >>> *coresight_register(struct coresight_desc *desc) >>> csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev)); >>> dev_set_name(&csdev->dev, "%s", desc->name); >>> + mutex_lock(&coresight_mutex); >>> ret = device_register(&csdev->dev); >>> if (ret) { >>> + mutex_unlock(&coresight_mutex); >>> put_device(&csdev->dev); >>> /* >>> * All resources are free'd explicitly via >>> @@ -1634,6 +1637,7 @@ struct coresight_device >>> *coresight_register(struct coresight_desc *desc) >>> ret = etm_perf_add_symlink_sink(csdev); >>> if (ret) { >>> + mutex_unlock(&coresight_mutex); >>> device_unregister(&csdev->dev); >>> /* >>> * As with the above, all resources are free'd >>> @@ -1645,8 +1649,6 @@ struct coresight_device >>> *coresight_register(struct coresight_desc *desc) >>> } >>> } >>> - mutex_lock(&coresight_mutex); >>> - >>> ret = coresight_create_conns_sysfs_group(csdev); >>> if (!ret) >>> ret = coresight_fixup_device_conns(csdev); >>> @@ -1663,8 +1665,6 @@ struct coresight_device >>> *coresight_register(struct coresight_desc *desc) >>> return csdev; >>> -err_free_csdev: >>> - kfree(csdev); >>> err_out: >>> /* Cleanup the connection information */ >>> coresight_release_platform_data(NULL, desc->pdata); >> >> Could we consolidate the unlock sequence to a single point with >> something like this (untested): >> >> >> diff --git a/drivers/hwtracing/coresight/coresight-core.c >> b/drivers/hwtracing/coresight/coresight-core.c >> index af00dca8d1ac..198ee140c6e6 100644 >> --- a/drivers/hwtracing/coresight/coresight-core.c >> +++ b/drivers/hwtracing/coresight/coresight-core.c >> @@ -1571,6 +1571,7 @@ struct coresight_device >> *coresight_register(struct coresight_desc *desc) >> int nr_refcnts = 1; >> atomic_t *refcnts = NULL; >> struct coresight_device *csdev; >> + bool registered = false; >> >> csdev = kzalloc(sizeof(*csdev), GFP_KERNEL); >> if (!csdev) { >> @@ -1591,7 +1592,8 @@ struct coresight_device >> *coresight_register(struct coresight_desc *desc) >> refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL); >> if (!refcnts) { >> ret = -ENOMEM; >> - goto err_free_csdev; >> + kfree(csdev); >> + goto err_out; >> } >> >> csdev->refcnt = refcnts; >> @@ -1616,6 +1618,13 @@ struct coresight_device >> *coresight_register(struct coresight_desc *desc) >> csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev)); >> dev_set_name(&csdev->dev, "%s", desc->name); >> >> + /* >> + * Make sure the device registration and the connection fixup >> + * are synchronised, so that we don't see uninitialised devices >> + * on the coresight bus while trying to resolve the connections. >> + */ >> + mutex_lock(&coresight_mutex); >> + >> ret = device_register(&csdev->dev); >> if (ret) { >> put_device(&csdev->dev); >> @@ -1623,7 +1632,7 @@ struct coresight_device >> *coresight_register(struct coresight_desc *desc) >> * All resources are free'd explicitly via >> * coresight_device_release(), triggered from put_device(). >> */ >> - goto err_out; >> + goto out_unlock; >> } >> >> if (csdev->type == CORESIGHT_DEV_TYPE_SINK || >> @@ -1638,11 +1647,11 @@ struct coresight_device >> *coresight_register(struct coresight_desc *desc) >> * from put_device(), which is in turn called from >> * function device_unregister(). >> */ >> - goto err_out; >> + goto out_unlock; >> } >> } >> - >> - mutex_lock(&coresight_mutex); >> + /* Device is now registered */ >> + registered = true; >> >> ret = coresight_create_conns_sysfs_group(csdev); >> if (!ret) >> @@ -1652,16 +1661,15 @@ struct coresight_device >> *coresight_register(struct coresight_desc *desc) >> if (!ret && cti_assoc_ops && cti_assoc_ops->add) >> cti_assoc_ops->add(csdev); >> >> +out_unlock: >> mutex_unlock(&coresight_mutex); >> - if (ret) { >> + /* Success */ >> + if (!ret) >> + return csdev; >> + >> + /* Unregister the device if needed */ >> + if (registered) >> coresight_unregister(csdev); >> - return ERR_PTR(ret); >> - } > > Apologies, this should be preserved : > > i.e, > if (registered) { > coresight_unregister(csdev); > return ERR_PTR(ret); > } I will test with your changes and get back to you later. Thanks Jinlong Mao > > Thanks > Suzuki
Hi Suzuki, On 3/9/2022 10:57 AM, Jinlong Mao wrote: > Hi Suzuki, > > On 3/8/2022 9:36 PM, Suzuki K Poulose wrote: >> On 08/03/2022 11:56, Suzuki K Poulose wrote: >>> Hi Jinlong >>> >>> On 04/03/2022 08:23, Mao Jinlong wrote: >>>> It is possibe that probe failure issue happens when the device >>>> and its child_device's probe happens at the same time. >>>> In coresight_make_links, has_conns_grp is true for parent, but >>>> has_conns_grp is false for child device as has_conns_grp is set >>>> to true in coresight_create_conns_sysfs_group. The probe of parent >>>> device will fail at this condition. Add has_conns_grp check for >>>> child device before make the links and make the process from >>>> device_register to connection_create be atomic to avoid this >>>> probe failure issue. >>>> >>>> Suggested-by: Suzuki K Poulose <suzuki.poulose@arm.com> >>>> Suggested-by: Mike Leach <mike.leach@linaro.org> >>>> Signed-off-by: Mao Jinlong <quic_jinlmao@quicinc.com> >>>> --- >>>> drivers/hwtracing/coresight/coresight-core.c | 12 ++++++------ >>>> 1 file changed, 6 insertions(+), 6 deletions(-) >>>> >>>> diff --git a/drivers/hwtracing/coresight/coresight-core.c >>>> b/drivers/hwtracing/coresight/coresight-core.c >>>> index 88653d1c06a4..b3e3bc59c09b 100644 >>>> --- a/drivers/hwtracing/coresight/coresight-core.c >>>> +++ b/drivers/hwtracing/coresight/coresight-core.c >>>> @@ -1382,7 +1382,7 @@ static int >>>> coresight_fixup_device_conns(struct coresight_device *csdev) >>>> continue; >>>> conn->child_dev = >>>> coresight_find_csdev_by_fwnode(conn->child_fwnode); >>>> - if (conn->child_dev) { >>>> + if (conn->child_dev && conn->child_dev->has_conns_grp) { >>>> ret = coresight_make_links(csdev, conn, >>>> conn->child_dev); >>>> if (ret) >>>> @@ -1594,7 +1594,8 @@ struct coresight_device >>>> *coresight_register(struct coresight_desc *desc) >>>> refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL); >>>> if (!refcnts) { >>>> ret = -ENOMEM; >>>> - goto err_free_csdev; >>>> + kfree(csdev); >>>> + goto err_out; >>>> } >>>> csdev->refcnt = refcnts; >>>> @@ -1619,8 +1620,10 @@ struct coresight_device >>>> *coresight_register(struct coresight_desc *desc) >>>> csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev)); >>>> dev_set_name(&csdev->dev, "%s", desc->name); >>>> + mutex_lock(&coresight_mutex); >>>> ret = device_register(&csdev->dev); >>>> if (ret) { >>>> + mutex_unlock(&coresight_mutex); >>>> put_device(&csdev->dev); >>>> /* >>>> * All resources are free'd explicitly via >>>> @@ -1634,6 +1637,7 @@ struct coresight_device >>>> *coresight_register(struct coresight_desc *desc) >>>> ret = etm_perf_add_symlink_sink(csdev); >>>> if (ret) { >>>> + mutex_unlock(&coresight_mutex); >>>> device_unregister(&csdev->dev); >>>> /* >>>> * As with the above, all resources are free'd >>>> @@ -1645,8 +1649,6 @@ struct coresight_device >>>> *coresight_register(struct coresight_desc *desc) >>>> } >>>> } >>>> - mutex_lock(&coresight_mutex); >>>> - >>>> ret = coresight_create_conns_sysfs_group(csdev); >>>> if (!ret) >>>> ret = coresight_fixup_device_conns(csdev); >>>> @@ -1663,8 +1665,6 @@ struct coresight_device >>>> *coresight_register(struct coresight_desc *desc) >>>> return csdev; >>>> -err_free_csdev: >>>> - kfree(csdev); >>>> err_out: >>>> /* Cleanup the connection information */ >>>> coresight_release_platform_data(NULL, desc->pdata); >>> >>> Could we consolidate the unlock sequence to a single point with >>> something like this (untested): >>> >>> >>> diff --git a/drivers/hwtracing/coresight/coresight-core.c >>> b/drivers/hwtracing/coresight/coresight-core.c >>> index af00dca8d1ac..198ee140c6e6 100644 >>> --- a/drivers/hwtracing/coresight/coresight-core.c >>> +++ b/drivers/hwtracing/coresight/coresight-core.c >>> @@ -1571,6 +1571,7 @@ struct coresight_device >>> *coresight_register(struct coresight_desc *desc) >>> int nr_refcnts = 1; >>> atomic_t *refcnts = NULL; >>> struct coresight_device *csdev; >>> + bool registered = false; >>> >>> csdev = kzalloc(sizeof(*csdev), GFP_KERNEL); >>> if (!csdev) { >>> @@ -1591,7 +1592,8 @@ struct coresight_device >>> *coresight_register(struct coresight_desc *desc) >>> refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL); >>> if (!refcnts) { >>> ret = -ENOMEM; >>> - goto err_free_csdev; >>> + kfree(csdev); >>> + goto err_out; >>> } >>> >>> csdev->refcnt = refcnts; >>> @@ -1616,6 +1618,13 @@ struct coresight_device >>> *coresight_register(struct coresight_desc *desc) >>> csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev)); >>> dev_set_name(&csdev->dev, "%s", desc->name); >>> >>> + /* >>> + * Make sure the device registration and the connection fixup >>> + * are synchronised, so that we don't see uninitialised devices >>> + * on the coresight bus while trying to resolve the connections. >>> + */ >>> + mutex_lock(&coresight_mutex); >>> + >>> ret = device_register(&csdev->dev); >>> if (ret) { >>> put_device(&csdev->dev); >>> @@ -1623,7 +1632,7 @@ struct coresight_device >>> *coresight_register(struct coresight_desc *desc) >>> * All resources are free'd explicitly via >>> * coresight_device_release(), triggered from put_device(). >>> */ >>> - goto err_out; >>> + goto out_unlock; >>> } >>> >>> if (csdev->type == CORESIGHT_DEV_TYPE_SINK || >>> @@ -1638,11 +1647,11 @@ struct coresight_device >>> *coresight_register(struct coresight_desc *desc) >>> * from put_device(), which is in turn called from >>> * function device_unregister(). >>> */ >>> - goto err_out; >>> + goto out_unlock; >>> } >>> } >>> - >>> - mutex_lock(&coresight_mutex); >>> + /* Device is now registered */ >>> + registered = true; >>> >>> ret = coresight_create_conns_sysfs_group(csdev); >>> if (!ret) >>> @@ -1652,16 +1661,15 @@ struct coresight_device >>> *coresight_register(struct coresight_desc *desc) >>> if (!ret && cti_assoc_ops && cti_assoc_ops->add) >>> cti_assoc_ops->add(csdev); >>> >>> +out_unlock: >>> mutex_unlock(&coresight_mutex); >>> - if (ret) { >>> + /* Success */ >>> + if (!ret) >>> + return csdev; >>> + >>> + /* Unregister the device if needed */ >>> + if (registered) >>> coresight_unregister(csdev); >>> - return ERR_PTR(ret); >>> - } >> >> Apologies, this should be preserved : >> >> i.e, >> if (registered) { >> coresight_unregister(csdev); >> return ERR_PTR(ret); >> } I tested with your changes. No probe failure during reboot stress test. I submit "[PATCH v3] coresight: core: Fix coresight device probe failure issue". Please help to review. Thanks Jinlong Mao > I will test with your changes and get back to you later. > > Thanks > Jinlong Mao >> >> Thanks >> Suzuki
diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index 88653d1c06a4..b3e3bc59c09b 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -1382,7 +1382,7 @@ static int coresight_fixup_device_conns(struct coresight_device *csdev) continue; conn->child_dev = coresight_find_csdev_by_fwnode(conn->child_fwnode); - if (conn->child_dev) { + if (conn->child_dev && conn->child_dev->has_conns_grp) { ret = coresight_make_links(csdev, conn, conn->child_dev); if (ret) @@ -1594,7 +1594,8 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL); if (!refcnts) { ret = -ENOMEM; - goto err_free_csdev; + kfree(csdev); + goto err_out; } csdev->refcnt = refcnts; @@ -1619,8 +1620,10 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev)); dev_set_name(&csdev->dev, "%s", desc->name); + mutex_lock(&coresight_mutex); ret = device_register(&csdev->dev); if (ret) { + mutex_unlock(&coresight_mutex); put_device(&csdev->dev); /* * All resources are free'd explicitly via @@ -1634,6 +1637,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) ret = etm_perf_add_symlink_sink(csdev); if (ret) { + mutex_unlock(&coresight_mutex); device_unregister(&csdev->dev); /* * As with the above, all resources are free'd @@ -1645,8 +1649,6 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) } } - mutex_lock(&coresight_mutex); - ret = coresight_create_conns_sysfs_group(csdev); if (!ret) ret = coresight_fixup_device_conns(csdev); @@ -1663,8 +1665,6 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) return csdev; -err_free_csdev: - kfree(csdev); err_out: /* Cleanup the connection information */ coresight_release_platform_data(NULL, desc->pdata);
It is possibe that probe failure issue happens when the device and its child_device's probe happens at the same time. In coresight_make_links, has_conns_grp is true for parent, but has_conns_grp is false for child device as has_conns_grp is set to true in coresight_create_conns_sysfs_group. The probe of parent device will fail at this condition. Add has_conns_grp check for child device before make the links and make the process from device_register to connection_create be atomic to avoid this probe failure issue. Suggested-by: Suzuki K Poulose <suzuki.poulose@arm.com> Suggested-by: Mike Leach <mike.leach@linaro.org> Signed-off-by: Mao Jinlong <quic_jinlmao@quicinc.com> --- drivers/hwtracing/coresight/coresight-core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-)