diff mbox series

[v4,06/20] coresight: add try_get_module() in coresight_grab_device()

Message ID 20200723042802.22511-7-tingwei@codeaurora.org (mailing list archive)
State New, archived
Headers show
Series coresight: allow to build coresight as modules | expand

Commit Message

Tingwei Zhang July 23, 2020, 4:27 a.m. UTC
When coresight device is in an active session, driver module of
that device should not be removed. Use try_get_module() in
coresight_grab_device() to prevent module to be unloaded.

Signed-off-by: Tingwei Zhang <tingwei@codeaurora.org>
---
 drivers/hwtracing/coresight/coresight.c | 27 +++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

Comments

Mike Leach July 23, 2020, 10:55 a.m. UTC | #1
Hi Tingwei,

On Thu, 23 Jul 2020 at 05:28, Tingwei Zhang <tingwei@codeaurora.org> wrote:
>
> When coresight device is in an active session, driver module of
> that device should not be removed. Use try_get_module() in
> coresight_grab_device() to prevent module to be unloaded.
>
> Signed-off-by: Tingwei Zhang <tingwei@codeaurora.org>
> ---
>  drivers/hwtracing/coresight/coresight.c | 27 +++++++++++++++++++++----
>  1 file changed, 23 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
> index b7151c5f81b1..17bc76ea86ae 100644
> --- a/drivers/hwtracing/coresight/coresight.c
> +++ b/drivers/hwtracing/coresight/coresight.c
> @@ -640,7 +640,7 @@ struct coresight_device *coresight_get_sink_by_id(u32 id)
>   * don't appear on the trace path, they should be handled along with the
>   * the master device.
>   */
> -static void coresight_grab_device(struct coresight_device *csdev)
> +static int coresight_grab_device(struct coresight_device *csdev)
>  {
>         int i;
>
> @@ -648,10 +648,25 @@ static void coresight_grab_device(struct coresight_device *csdev)
>                 struct coresight_device *child;
>
>                 child  = csdev->pdata->conns[i].child_dev;
> -               if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> +               if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) {
> +                       if (!try_module_get(child->dev.parent->driver->owner))
> +                               goto err;
>                         pm_runtime_get_sync(child->dev.parent);
> +               }
>         }
> +       if (!try_module_get(csdev->dev.parent->driver->owner))
> +               goto err;
>         pm_runtime_get_sync(csdev->dev.parent);
> +       return 0;
> +err:
> +       for (i--; i >= 0; i--) {
> +               struct coresight_device *child;
> +
> +               child  = csdev->pdata->conns[i].child_dev;
> +               if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> +                       pm_runtime_put(child->dev.parent);
> +       }
> +       return -ENODEV;
>  }
>
>  /*
> @@ -663,12 +678,15 @@ static void coresight_drop_device(struct coresight_device *csdev)
>         int i;
>
>         pm_runtime_put(csdev->dev.parent);
> +       module_put(csdev->dev.parent->driver->owner);
>         for (i = 0; i < csdev->pdata->nr_outport; i++) {
>                 struct coresight_device *child;
>
>                 child  = csdev->pdata->conns[i].child_dev;
> -               if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> +               if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) {
>                         pm_runtime_put(child->dev.parent);
> +                       module_put(child->dev.parent->driver->owner);
> +               }
>         }
>  }
>
> @@ -721,7 +739,8 @@ static int _coresight_build_path(struct coresight_device *csdev,
>         if (!node)
>                 return -ENOMEM;
>
> -       coresight_grab_device(csdev);
> +       if (coresight_grab_device(csdev))
> +               return -ENODEV;
>         node->csdev = csdev;
>         list_add(&node->link, path);
>
> --
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project
>

The CTI devices are associated with other coresight components using
csdev->ect_dev; These are not handled in the main linear trace path as
these have a star topology interlinking all components.
However, when a component has an associated ect_dev then is enabled at
the same time as the other component is enabled.

So a module_get/put will be needed for this association to prevent the
CTI being removed while a trace session is active.
I suggest in coresight.c : coresight_control_assoc_ectdev()

Regards

Mike
Greg KH July 23, 2020, 11:07 a.m. UTC | #2
On Thu, Jul 23, 2020 at 12:27:48PM +0800, Tingwei Zhang wrote:
> When coresight device is in an active session, driver module of
> that device should not be removed. Use try_get_module() in
> coresight_grab_device() to prevent module to be unloaded.

Are you sure this works?  Why is it needed at all?  Why not just tear
down the children properly when a module is removed so that you don't
need this at all?

> 
> Signed-off-by: Tingwei Zhang <tingwei@codeaurora.org>
> ---
>  drivers/hwtracing/coresight/coresight.c | 27 +++++++++++++++++++++----
>  1 file changed, 23 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
> index b7151c5f81b1..17bc76ea86ae 100644
> --- a/drivers/hwtracing/coresight/coresight.c
> +++ b/drivers/hwtracing/coresight/coresight.c
> @@ -640,7 +640,7 @@ struct coresight_device *coresight_get_sink_by_id(u32 id)
>   * don't appear on the trace path, they should be handled along with the
>   * the master device.
>   */
> -static void coresight_grab_device(struct coresight_device *csdev)
> +static int coresight_grab_device(struct coresight_device *csdev)
>  {
>  	int i;
>  
> @@ -648,10 +648,25 @@ static void coresight_grab_device(struct coresight_device *csdev)
>  		struct coresight_device *child;
>  
>  		child  = csdev->pdata->conns[i].child_dev;
> -		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> +		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) {
> +			if (!try_module_get(child->dev.parent->driver->owner))

Why the child's parent?  Why not the child itself?


> +				goto err;

What about the error given to you here?  Why throw that away?

>  			pm_runtime_get_sync(child->dev.parent);
> +		}
>  	}
> +	if (!try_module_get(csdev->dev.parent->driver->owner))
> +		goto err;

You don't reduce the child's parent's driver owner module reference
here?

Ugh, that's a horid sentence :(

>  	pm_runtime_get_sync(csdev->dev.parent);
> +	return 0;
> +err:
> +	for (i--; i >= 0; i--) {
> +		struct coresight_device *child;
> +
> +		child  = csdev->pdata->conns[i].child_dev;
> +		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> +			pm_runtime_put(child->dev.parent);
> +	}
> +	return -ENODEV;
>  }
>  
>  /*
> @@ -663,12 +678,15 @@ static void coresight_drop_device(struct coresight_device *csdev)
>  	int i;
>  
>  	pm_runtime_put(csdev->dev.parent);
> +	module_put(csdev->dev.parent->driver->owner);
>  	for (i = 0; i < csdev->pdata->nr_outport; i++) {
>  		struct coresight_device *child;
>  
>  		child  = csdev->pdata->conns[i].child_dev;
> -		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> +		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) {
>  			pm_runtime_put(child->dev.parent);
> +			module_put(child->dev.parent->driver->owner);
> +		}
>  	}
>  }
>  
> @@ -721,7 +739,8 @@ static int _coresight_build_path(struct coresight_device *csdev,
>  	if (!node)
>  		return -ENOMEM;
>  
> -	coresight_grab_device(csdev);
> +	if (coresight_grab_device(csdev))
> +		return -ENODEV;

Why not return the error given to you?


thanks,

greg k-h
Mathieu Poirier July 23, 2020, 6:04 p.m. UTC | #3
Hi Greg,

On Thu, Jul 23, 2020 at 01:07:07PM +0200, Greg Kroah-Hartman wrote:
> On Thu, Jul 23, 2020 at 12:27:48PM +0800, Tingwei Zhang wrote:
> > When coresight device is in an active session, driver module of
> > that device should not be removed. Use try_get_module() in
> > coresight_grab_device() to prevent module to be unloaded.
> 
> Are you sure this works?  Why is it needed at all?  Why not just tear
> down the children properly when a module is removed so that you don't
> need this at all?

Using the terms parent and child is somewhat ambiguous...  This is not a
parent-child relationship but simply an association between devices, something
like port 1 on device "parent" is connected to port 2 on device "child".  The
parent-child nomenclature was chosen to reflect that a device appears before
another in a coresight path.  Otherwise there is no other relation between
devices, hence the choice of using try_get_module()/put_module() to prevent
drivers from being taken away.  I'd be happy to proceed differently but haven't
found better options.

Going back to parent/child, we could have chosen left/right, up/down or A/B, all
of which are just as confusion. 

> 
> > 
> > Signed-off-by: Tingwei Zhang <tingwei@codeaurora.org>
> > ---
> >  drivers/hwtracing/coresight/coresight.c | 27 +++++++++++++++++++++----
> >  1 file changed, 23 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
> > index b7151c5f81b1..17bc76ea86ae 100644
> > --- a/drivers/hwtracing/coresight/coresight.c
> > +++ b/drivers/hwtracing/coresight/coresight.c
> > @@ -640,7 +640,7 @@ struct coresight_device *coresight_get_sink_by_id(u32 id)
> >   * don't appear on the trace path, they should be handled along with the
> >   * the master device.
> >   */
> > -static void coresight_grab_device(struct coresight_device *csdev)
> > +static int coresight_grab_device(struct coresight_device *csdev)
> >  {
> >  	int i;
> >  
> > @@ -648,10 +648,25 @@ static void coresight_grab_device(struct coresight_device *csdev)
> >  		struct coresight_device *child;
> >  
> >  		child  = csdev->pdata->conns[i].child_dev;
> > -		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> > +		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) {
> > +			if (!try_module_get(child->dev.parent->driver->owner))
> 
> Why the child's parent?  Why not the child itself?

The device structure of each coresight_device is not associated with a driver.
It is there to take advantages of device goodies such as dev.type, dev.group,
dev.release and dev.bus.  Coresight IP blocks are discovered on the AMBA bus and as
such amba_device::dev::driver holds the driver itself.  In coresight_register()
the association coresigth::dev::parent = amba_device::dev is made.

> 
> 
> > +				goto err;
> 
> What about the error given to you here?  Why throw that away?
> 
> >  			pm_runtime_get_sync(child->dev.parent);
> > +		}
> >  	}
> > +	if (!try_module_get(csdev->dev.parent->driver->owner))
> > +		goto err;
> 
> You don't reduce the child's parent's driver owner module reference
> here?

Here @parent is referencing the current device.  Now that helper devices
connected to any of its outgoing ports have been enabled (and a reference count 
to the helper device driver incremented), a reference count to the current device
driver can also be incremented. 

I hope this clarify what is going on here.  It is a little unorthodox but so is
coresight.

Thanks,
Mathieu

> 
> Ugh, that's a horid sentence :(
> 
> >  	pm_runtime_get_sync(csdev->dev.parent);
> > +	return 0;
> > +err:
> > +	for (i--; i >= 0; i--) {
> > +		struct coresight_device *child;
> > +
> > +		child  = csdev->pdata->conns[i].child_dev;
> > +		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> > +			pm_runtime_put(child->dev.parent);
> > +	}
> > +	return -ENODEV;
> >  }
> >  
> >  /*
> > @@ -663,12 +678,15 @@ static void coresight_drop_device(struct coresight_device *csdev)
> >  	int i;
> >  
> >  	pm_runtime_put(csdev->dev.parent);
> > +	module_put(csdev->dev.parent->driver->owner);
> >  	for (i = 0; i < csdev->pdata->nr_outport; i++) {
> >  		struct coresight_device *child;
> >  
> >  		child  = csdev->pdata->conns[i].child_dev;
> > -		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> > +		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) {
> >  			pm_runtime_put(child->dev.parent);
> > +			module_put(child->dev.parent->driver->owner);
> > +		}
> >  	}
> >  }
> >  
> > @@ -721,7 +739,8 @@ static int _coresight_build_path(struct coresight_device *csdev,
> >  	if (!node)
> >  		return -ENOMEM;
> >  
> > -	coresight_grab_device(csdev);
> > +	if (coresight_grab_device(csdev))
> > +		return -ENODEV;
> 
> Why not return the error given to you?
> 
> 
> thanks,
> 
> greg k-h
Greg KH July 23, 2020, 6:18 p.m. UTC | #4
On Thu, Jul 23, 2020 at 12:04:47PM -0600, Mathieu Poirier wrote:
> Hi Greg,
> 
> On Thu, Jul 23, 2020 at 01:07:07PM +0200, Greg Kroah-Hartman wrote:
> > On Thu, Jul 23, 2020 at 12:27:48PM +0800, Tingwei Zhang wrote:
> > > When coresight device is in an active session, driver module of
> > > that device should not be removed. Use try_get_module() in
> > > coresight_grab_device() to prevent module to be unloaded.
> > 
> > Are you sure this works?  Why is it needed at all?  Why not just tear
> > down the children properly when a module is removed so that you don't
> > need this at all?
> 
> Using the terms parent and child is somewhat ambiguous...  This is not a
> parent-child relationship but simply an association between devices, something
> like port 1 on device "parent" is connected to port 2 on device "child".  The
> parent-child nomenclature was chosen to reflect that a device appears before
> another in a coresight path.  Otherwise there is no other relation between
> devices, hence the choice of using try_get_module()/put_module() to prevent
> drivers from being taken away.  I'd be happy to proceed differently but haven't
> found better options.
> 
> Going back to parent/child, we could have chosen left/right, up/down or A/B, all
> of which are just as confusion. 

Ok, thanks.

But this causes confusion for everyone as seen below:

> > > Signed-off-by: Tingwei Zhang <tingwei@codeaurora.org>
> > > ---
> > >  drivers/hwtracing/coresight/coresight.c | 27 +++++++++++++++++++++----
> > >  1 file changed, 23 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
> > > index b7151c5f81b1..17bc76ea86ae 100644
> > > --- a/drivers/hwtracing/coresight/coresight.c
> > > +++ b/drivers/hwtracing/coresight/coresight.c
> > > @@ -640,7 +640,7 @@ struct coresight_device *coresight_get_sink_by_id(u32 id)
> > >   * don't appear on the trace path, they should be handled along with the
> > >   * the master device.
> > >   */
> > > -static void coresight_grab_device(struct coresight_device *csdev)
> > > +static int coresight_grab_device(struct coresight_device *csdev)
> > >  {
> > >  	int i;
> > >  
> > > @@ -648,10 +648,25 @@ static void coresight_grab_device(struct coresight_device *csdev)
> > >  		struct coresight_device *child;
> > >  
> > >  		child  = csdev->pdata->conns[i].child_dev;
> > > -		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> > > +		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) {
> > > +			if (!try_module_get(child->dev.parent->driver->owner))
> > 
> > Why the child's parent?  Why not the child itself?
> 
> The device structure of each coresight_device is not associated with a driver.
> It is there to take advantages of device goodies such as dev.type, dev.group,
> dev.release and dev.bus.  Coresight IP blocks are discovered on the AMBA bus and as
> such amba_device::dev::driver holds the driver itself.  In coresight_register()
> the association coresigth::dev::parent = amba_device::dev is made.
> 
> > 
> > 
> > > +				goto err;
> > 
> > What about the error given to you here?  Why throw that away?
> > 
> > >  			pm_runtime_get_sync(child->dev.parent);
> > > +		}
> > >  	}
> > > +	if (!try_module_get(csdev->dev.parent->driver->owner))
> > > +		goto err;
> > 
> > You don't reduce the child's parent's driver owner module reference
> > here?
> 
> Here @parent is referencing the current device.  Now that helper devices
> connected to any of its outgoing ports have been enabled (and a reference count 
> to the helper device driver incremented), a reference count to the current device
> driver can also be incremented. 

I mean the fact that your error handling does not seem to roll back the
module reference count you got up above in the other loop.

Or if it does, it's really really not obvious, and should at the very
least, be commented as to how it's all cleaning up properly.

thanks,

greg k-h
Mathieu Poirier July 23, 2020, 7:15 p.m. UTC | #5
On Thu, Jul 23, 2020 at 08:18:37PM +0200, Greg Kroah-Hartman wrote:
> On Thu, Jul 23, 2020 at 12:04:47PM -0600, Mathieu Poirier wrote:
> > Hi Greg,
> > 
> > On Thu, Jul 23, 2020 at 01:07:07PM +0200, Greg Kroah-Hartman wrote:
> > > On Thu, Jul 23, 2020 at 12:27:48PM +0800, Tingwei Zhang wrote:
> > > > When coresight device is in an active session, driver module of
> > > > that device should not be removed. Use try_get_module() in
> > > > coresight_grab_device() to prevent module to be unloaded.
> > > 
> > > Are you sure this works?  Why is it needed at all?  Why not just tear
> > > down the children properly when a module is removed so that you don't
> > > need this at all?
> > 
> > Using the terms parent and child is somewhat ambiguous...  This is not a
> > parent-child relationship but simply an association between devices, something
> > like port 1 on device "parent" is connected to port 2 on device "child".  The
> > parent-child nomenclature was chosen to reflect that a device appears before
> > another in a coresight path.  Otherwise there is no other relation between
> > devices, hence the choice of using try_get_module()/put_module() to prevent
> > drivers from being taken away.  I'd be happy to proceed differently but haven't
> > found better options.
> > 
> > Going back to parent/child, we could have chosen left/right, up/down or A/B, all
> > of which are just as confusion. 
> 
> Ok, thanks.
> 
> But this causes confusion for everyone as seen below:
> 
> > > > Signed-off-by: Tingwei Zhang <tingwei@codeaurora.org>
> > > > ---
> > > >  drivers/hwtracing/coresight/coresight.c | 27 +++++++++++++++++++++----
> > > >  1 file changed, 23 insertions(+), 4 deletions(-)
> > > > 
> > > > diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
> > > > index b7151c5f81b1..17bc76ea86ae 100644
> > > > --- a/drivers/hwtracing/coresight/coresight.c
> > > > +++ b/drivers/hwtracing/coresight/coresight.c
> > > > @@ -640,7 +640,7 @@ struct coresight_device *coresight_get_sink_by_id(u32 id)
> > > >   * don't appear on the trace path, they should be handled along with the
> > > >   * the master device.
> > > >   */
> > > > -static void coresight_grab_device(struct coresight_device *csdev)
> > > > +static int coresight_grab_device(struct coresight_device *csdev)
> > > >  {
> > > >  	int i;
> > > >  
> > > > @@ -648,10 +648,25 @@ static void coresight_grab_device(struct coresight_device *csdev)
> > > >  		struct coresight_device *child;
> > > >  
> > > >  		child  = csdev->pdata->conns[i].child_dev;
> > > > -		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> > > > +		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) {
> > > > +			if (!try_module_get(child->dev.parent->driver->owner))
> > > 
> > > Why the child's parent?  Why not the child itself?
> > 
> > The device structure of each coresight_device is not associated with a driver.
> > It is there to take advantages of device goodies such as dev.type, dev.group,
> > dev.release and dev.bus.  Coresight IP blocks are discovered on the AMBA bus and as
> > such amba_device::dev::driver holds the driver itself.  In coresight_register()
> > the association coresigth::dev::parent = amba_device::dev is made.
> > 
> > > 
> > > 
> > > > +				goto err;
> > > 
> > > What about the error given to you here?  Why throw that away?
> > > 
> > > >  			pm_runtime_get_sync(child->dev.parent);
> > > > +		}
> > > >  	}
> > > > +	if (!try_module_get(csdev->dev.parent->driver->owner))
> > > > +		goto err;
> > > 
> > > You don't reduce the child's parent's driver owner module reference
> > > here?
> > 
> > Here @parent is referencing the current device.  Now that helper devices
> > connected to any of its outgoing ports have been enabled (and a reference count 
> > to the helper device driver incremented), a reference count to the current device
> > driver can also be incremented. 
> 
> I mean the fact that your error handling does not seem to roll back the
> module reference count you got up above in the other loop.

Ah! You were talking about the error condition, while I thought you were
referring to the normal execution path.  I am in agreement with all your comments on error
handling. 

> 
> Or if it does, it's really really not obvious, and should at the very
> least, be commented as to how it's all cleaning up properly.
> 
> thanks,
> 
> greg k-h
Tingwei Zhang July 24, 2020, 12:41 a.m. UTC | #6
Fri, Jul 24, 2020 at 03:15:40AM +0800, Mathieu Poirier wrote:
> On Thu, Jul 23, 2020 at 08:18:37PM +0200, Greg Kroah-Hartman wrote:
> > On Thu, Jul 23, 2020 at 12:04:47PM -0600, Mathieu Poirier wrote:
> > > Hi Greg,
> > > 
> > > On Thu, Jul 23, 2020 at 01:07:07PM +0200, Greg Kroah-Hartman wrote:
> > > > On Thu, Jul 23, 2020 at 12:27:48PM +0800, Tingwei Zhang wrote:
> > > > > When coresight device is in an active session, driver module of
> > > > > that device should not be removed. Use try_get_module() in
> > > > > coresight_grab_device() to prevent module to be unloaded.
> > > > 
> > > > Are you sure this works?  Why is it needed at all?  Why not just
> tear
> > > > down the children properly when a module is removed so that you
> don't
> > > > need this at all?
> > > 
> > > Using the terms parent and child is somewhat ambiguous...  This is not
> a
> > > parent-child relationship but simply an association between devices,
> something
> > > like port 1 on device "parent" is connected to port 2 on device
> "child".  The
> > > parent-child nomenclature was chosen to reflect that a device appears
> before
> > > another in a coresight path.  Otherwise there is no other relation
> between
> > > devices, hence the choice of using try_get_module()/put_module() to
> prevent
> > > drivers from being taken away.  I'd be happy to proceed differently
> but haven't
> > > found better options.
> > > 
> > > Going back to parent/child, we could have chosen left/right, up/down
> or A/B, all
> > > of which are just as confusion. 
> > 
> > Ok, thanks.
> > 
> > But this causes confusion for everyone as seen below:
> > 
> > > > > Signed-off-by: Tingwei Zhang <tingwei@codeaurora.org>
> > > > > ---
> > > > >  drivers/hwtracing/coresight/coresight.c | 27
> +++++++++++++++++++++----
> > > > >  1 file changed, 23 insertions(+), 4 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/hwtracing/coresight/coresight.c
> b/drivers/hwtracing/coresight/coresight.c
> > > > > index b7151c5f81b1..17bc76ea86ae 100644
> > > > > --- a/drivers/hwtracing/coresight/coresight.c
> > > > > +++ b/drivers/hwtracing/coresight/coresight.c
> > > > > @@ -640,7 +640,7 @@ struct coresight_device
> *coresight_get_sink_by_id(u32 id)
> > > > >   * don't appear on the trace path, they should be handled along
> with the
> > > > >   * the master device.
> > > > >   */
> > > > > -static void coresight_grab_device(struct coresight_device *csdev)
> > > > > +static int coresight_grab_device(struct coresight_device *csdev)
> > > > >  {
> > > > >  	int i;
> > > > >  
> > > > > @@ -648,10 +648,25 @@ static void coresight_grab_device(struct
> coresight_device *csdev)
> > > > >  		struct coresight_device *child;
> > > > >  
> > > > >  		child  = csdev->pdata->conns[i].child_dev;
> > > > > -		if (child && child->type ==
> CORESIGHT_DEV_TYPE_HELPER)
> > > > > +		if (child && child->type ==
> CORESIGHT_DEV_TYPE_HELPER) {
> > > > > +			if
> (!try_module_get(child->dev.parent->driver->owner))
> > > > 
> > > > Why the child's parent?  Why not the child itself?
> > > 
> > > The device structure of each coresight_device is not associated with a
> driver.
> > > It is there to take advantages of device goodies such as dev.type,
> dev.group,
> > > dev.release and dev.bus.  Coresight IP blocks are discovered on the
> AMBA bus and as
> > > such amba_device::dev::driver holds the driver itself.  In
> coresight_register()
> > > the association coresigth::dev::parent = amba_device::dev is made.
> > > 
> > > > 
> > > > 
> > > > > +				goto err;
> > > > 
> > > > What about the error given to you here?  Why throw that away?
> > > > 
> > > > >  			pm_runtime_get_sync(child->dev.parent);
> > > > > +		}
> > > > >  	}
> > > > > +	if (!try_module_get(csdev->dev.parent->driver->owner))
> > > > > +		goto err;
> > > > 
> > > > You don't reduce the child's parent's driver owner module reference
> > > > here?
> > > 
> > > Here @parent is referencing the current device.  Now that helper
> devices
> > > connected to any of its outgoing ports have been enabled (and a
> reference count 
> > > to the helper device driver incremented), a reference count to the
> current device
> > > driver can also be incremented. 
> > 
> > I mean the fact that your error handling does not seem to roll back the
> > module reference count you got up above in the other loop.
> 
> Ah! You were talking about the error condition, while I thought you were
> referring to the normal execution path.  I am in agreement with all your
> comments on error
> handling. 
>
Thanks to point this error out, Greg and Mathieu. I'll check and fix
them in next revision.
 
> > 
> > Or if it does, it's really really not obvious, and should at the very
> > least, be commented as to how it's all cleaning up properly.
> > 
> > thanks,
> > 
> > greg k-h
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Tingwei Zhang July 24, 2020, 7:36 a.m. UTC | #7
On Thu, Jul 23, 2020 at 07:07:07PM +0800, Greg Kroah-Hartman wrote:
> On Thu, Jul 23, 2020 at 12:27:48PM +0800, Tingwei Zhang wrote:
> > When coresight device is in an active session, driver module of
> > that device should not be removed. Use try_get_module() in
> > coresight_grab_device() to prevent module to be unloaded.
> 
> Are you sure this works?  Why is it needed at all?  Why not just tear
> down the children properly when a module is removed so that you don't
> need this at all?
> 
> > 
> > Signed-off-by: Tingwei Zhang <tingwei@codeaurora.org>
> > ---
> >  drivers/hwtracing/coresight/coresight.c | 27 +++++++++++++++++++++----
> >  1 file changed, 23 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/hwtracing/coresight/coresight.c
> b/drivers/hwtracing/coresight/coresight.c
> > index b7151c5f81b1..17bc76ea86ae 100644
> > --- a/drivers/hwtracing/coresight/coresight.c
> > +++ b/drivers/hwtracing/coresight/coresight.c
> > @@ -640,7 +640,7 @@ struct coresight_device
> *coresight_get_sink_by_id(u32 id)
> >   * don't appear on the trace path, they should be handled along with
> the
> >   * the master device.
> >   */
> > -static void coresight_grab_device(struct coresight_device *csdev)
> > +static int coresight_grab_device(struct coresight_device *csdev)
> >  {
> >  	int i;
> >  
> > @@ -648,10 +648,25 @@ static void coresight_grab_device(struct
> coresight_device *csdev)
> >  		struct coresight_device *child;
> >  
> >  		child  = csdev->pdata->conns[i].child_dev;
> > -		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> > +		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) {
> > +			if
> (!try_module_get(child->dev.parent->driver->owner))
> 
> Why the child's parent?  Why not the child itself?
> 
> 
> > +				goto err;
> 
> What about the error given to you here?  Why throw that away?
> 
try_module_get() returns bool which doesn't have error code.
> >  			pm_runtime_get_sync(child->dev.parent);
> > +		}
> >  	}
> > +	if (!try_module_get(csdev->dev.parent->driver->owner))
> > +		goto err;
> 
> You don't reduce the child's parent's driver owner module reference
> here?
> 

I'll fix in next revision.

> Ugh, that's a horid sentence :(
> 
> >  	pm_runtime_get_sync(csdev->dev.parent);
> > +	return 0;
> > +err:
> > +	for (i--; i >= 0; i--) {
> > +		struct coresight_device *child;
> > +
> > +		child  = csdev->pdata->conns[i].child_dev;
> > +		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> > +			pm_runtime_put(child->dev.parent);
> > +	}
> > +	return -ENODEV;
> >  }
> >  
> >  /*
> > @@ -663,12 +678,15 @@ static void coresight_drop_device(struct
> coresight_device *csdev)
> >  	int i;
> >  
> >  	pm_runtime_put(csdev->dev.parent);
> > +	module_put(csdev->dev.parent->driver->owner);
> >  	for (i = 0; i < csdev->pdata->nr_outport; i++) {
> >  		struct coresight_device *child;
> >  
> >  		child  = csdev->pdata->conns[i].child_dev;
> > -		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> > +		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) {
> >  			pm_runtime_put(child->dev.parent);
> > +			module_put(child->dev.parent->driver->owner);
> > +		}
> >  	}
> >  }
> >  
> > @@ -721,7 +739,8 @@ static int _coresight_build_path(struct
> coresight_device *csdev,
> >  	if (!node)
> >  		return -ENOMEM;
> >  
> > -	coresight_grab_device(csdev);
> > +	if (coresight_grab_device(csdev))
> > +		return -ENODEV;
> 
> Why not return the error given to you?

Thanks. I will fix it in next revision.

Thanks, Tingwei

> 
> 
> thanks,
> 
> greg k-h
Tingwei Zhang July 25, 2020, 10:05 a.m. UTC | #8
Hi Mike,

 Thu, Jul 23, 2020 at 06:55:47PM +0800, Mike Leach wrote:
> Hi Tingwei,
> 
> On Thu, 23 Jul 2020 at 05:28, Tingwei Zhang <tingwei@codeaurora.org>
> wrote:
> >
> > When coresight device is in an active session, driver module of
> > that device should not be removed. Use try_get_module() in
> > coresight_grab_device() to prevent module to be unloaded.
> >
> > Signed-off-by: Tingwei Zhang <tingwei@codeaurora.org>
> > ---
> >  drivers/hwtracing/coresight/coresight.c | 27 +++++++++++++++++++++----
> >  1 file changed, 23 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/hwtracing/coresight/coresight.c
> b/drivers/hwtracing/coresight/coresight.c
> > index b7151c5f81b1..17bc76ea86ae 100644
> > --- a/drivers/hwtracing/coresight/coresight.c
> > +++ b/drivers/hwtracing/coresight/coresight.c
> > @@ -640,7 +640,7 @@ struct coresight_device
> *coresight_get_sink_by_id(u32 id)
> >   * don't appear on the trace path, they should be handled along with
> the
> >   * the master device.
> >   */
> > -static void coresight_grab_device(struct coresight_device *csdev)
> > +static int coresight_grab_device(struct coresight_device *csdev)
> >  {
> >         int i;
> >
> > @@ -648,10 +648,25 @@ static void coresight_grab_device(struct
> coresight_device *csdev)
> >                 struct coresight_device *child;
> >
> >                 child  = csdev->pdata->conns[i].child_dev;
> > -               if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> > +               if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) {
> > +                       if
> (!try_module_get(child->dev.parent->driver->owner))
> > +                               goto err;
> >                         pm_runtime_get_sync(child->dev.parent);
> > +               }
> >         }
> > +       if (!try_module_get(csdev->dev.parent->driver->owner))
> > +               goto err;
> >         pm_runtime_get_sync(csdev->dev.parent);
> > +       return 0;
> > +err:
> > +       for (i--; i >= 0; i--) {
> > +               struct coresight_device *child;
> > +
> > +               child  = csdev->pdata->conns[i].child_dev;
> > +               if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> > +                       pm_runtime_put(child->dev.parent);
> > +       }
> > +       return -ENODEV;
> >  }
> >
> >  /*
> > @@ -663,12 +678,15 @@ static void coresight_drop_device(struct
> coresight_device *csdev)
> >         int i;
> >
> >         pm_runtime_put(csdev->dev.parent);
> > +       module_put(csdev->dev.parent->driver->owner);
> >         for (i = 0; i < csdev->pdata->nr_outport; i++) {
> >                 struct coresight_device *child;
> >
> >                 child  = csdev->pdata->conns[i].child_dev;
> > -               if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> > +               if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) {
> >                         pm_runtime_put(child->dev.parent);
> > +                       module_put(child->dev.parent->driver->owner);
> > +               }
> >         }
> >  }
> >
> > @@ -721,7 +739,8 @@ static int _coresight_build_path(struct
> coresight_device *csdev,
> >         if (!node)
> >                 return -ENOMEM;
> >
> > -       coresight_grab_device(csdev);
> > +       if (coresight_grab_device(csdev))
> > +               return -ENODEV;
> >         node->csdev = csdev;
> >         list_add(&node->link, path);
> >
> > --
> > The Qualcomm Innovation Center, Inc. is a member of the Code Aurora
> Forum,
> > a Linux Foundation Collaborative Project
> >
> 
> The CTI devices are associated with other coresight components using
> csdev->ect_dev; These are not handled in the main linear trace path as
> these have a star topology interlinking all components.
> However, when a component has an associated ect_dev then is enabled at
> the same time as the other component is enabled.
> 
> So a module_get/put will be needed for this association to prevent the
> CTI being removed while a trace session is active.
> I suggest in coresight.c : coresight_control_assoc_ectdev()
> 

In module unload process, devices of that module will be removed. In
this case, cti_remove() is called on all cti device when unloading
cti module. All cti connections is cleaned at that time.
csdev->ect_dev is set to NULL. coresight_control_assoc_ectdev()
will return since since ect_csdev is NULL. 

I think we are safe without holding module reference since cti
driver has done a pretty good job on clean up.

Let me know if you still think we need hold the module reference.

Thanks, Tingwei
> Regards
> 
> Mike
> 
> 
> -- 
> Mike Leach
> Principal Engineer, ARM Ltd.
> Manchester Design Centre. UK
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Mike Leach July 25, 2020, 1:51 p.m. UTC | #9
Hi Tingwei,

On Sat, 25 Jul 2020 at 11:05, Tingwei Zhang <tingweiz@codeaurora.org> wrote:
>
> Hi Mike,
>
>  Thu, Jul 23, 2020 at 06:55:47PM +0800, Mike Leach wrote:
> > Hi Tingwei,
> >
> > On Thu, 23 Jul 2020 at 05:28, Tingwei Zhang <tingwei@codeaurora.org>
> > wrote:
> > >
> > > When coresight device is in an active session, driver module of
> > > that device should not be removed. Use try_get_module() in
> > > coresight_grab_device() to prevent module to be unloaded.
> > >
> > > Signed-off-by: Tingwei Zhang <tingwei@codeaurora.org>
> > > ---
> > >  drivers/hwtracing/coresight/coresight.c | 27 +++++++++++++++++++++----
> > >  1 file changed, 23 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/hwtracing/coresight/coresight.c
> > b/drivers/hwtracing/coresight/coresight.c
> > > index b7151c5f81b1..17bc76ea86ae 100644
> > > --- a/drivers/hwtracing/coresight/coresight.c
> > > +++ b/drivers/hwtracing/coresight/coresight.c
> > > @@ -640,7 +640,7 @@ struct coresight_device
> > *coresight_get_sink_by_id(u32 id)
> > >   * don't appear on the trace path, they should be handled along with
> > the
> > >   * the master device.
> > >   */
> > > -static void coresight_grab_device(struct coresight_device *csdev)
> > > +static int coresight_grab_device(struct coresight_device *csdev)
> > >  {
> > >         int i;
> > >
> > > @@ -648,10 +648,25 @@ static void coresight_grab_device(struct
> > coresight_device *csdev)
> > >                 struct coresight_device *child;
> > >
> > >                 child  = csdev->pdata->conns[i].child_dev;
> > > -               if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> > > +               if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) {
> > > +                       if
> > (!try_module_get(child->dev.parent->driver->owner))
> > > +                               goto err;
> > >                         pm_runtime_get_sync(child->dev.parent);
> > > +               }
> > >         }
> > > +       if (!try_module_get(csdev->dev.parent->driver->owner))
> > > +               goto err;
> > >         pm_runtime_get_sync(csdev->dev.parent);
> > > +       return 0;
> > > +err:
> > > +       for (i--; i >= 0; i--) {
> > > +               struct coresight_device *child;
> > > +
> > > +               child  = csdev->pdata->conns[i].child_dev;
> > > +               if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> > > +                       pm_runtime_put(child->dev.parent);
> > > +       }
> > > +       return -ENODEV;
> > >  }
> > >
> > >  /*
> > > @@ -663,12 +678,15 @@ static void coresight_drop_device(struct
> > coresight_device *csdev)
> > >         int i;
> > >
> > >         pm_runtime_put(csdev->dev.parent);
> > > +       module_put(csdev->dev.parent->driver->owner);
> > >         for (i = 0; i < csdev->pdata->nr_outport; i++) {
> > >                 struct coresight_device *child;
> > >
> > >                 child  = csdev->pdata->conns[i].child_dev;
> > > -               if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
> > > +               if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) {
> > >                         pm_runtime_put(child->dev.parent);
> > > +                       module_put(child->dev.parent->driver->owner);
> > > +               }
> > >         }
> > >  }
> > >
> > > @@ -721,7 +739,8 @@ static int _coresight_build_path(struct
> > coresight_device *csdev,
> > >         if (!node)
> > >                 return -ENOMEM;
> > >
> > > -       coresight_grab_device(csdev);
> > > +       if (coresight_grab_device(csdev))
> > > +               return -ENODEV;
> > >         node->csdev = csdev;
> > >         list_add(&node->link, path);
> > >
> > > --
> > > The Qualcomm Innovation Center, Inc. is a member of the Code Aurora
> > Forum,
> > > a Linux Foundation Collaborative Project
> > >
> >
> > The CTI devices are associated with other coresight components using
> > csdev->ect_dev; These are not handled in the main linear trace path as
> > these have a star topology interlinking all components.
> > However, when a component has an associated ect_dev then is enabled at
> > the same time as the other component is enabled.
> >
> > So a module_get/put will be needed for this association to prevent the
> > CTI being removed while a trace session is active.
> > I suggest in coresight.c : coresight_control_assoc_ectdev()
> >
>
> In module unload process, devices of that module will be removed. In
> this case, cti_remove() is called on all cti device when unloading
> cti module. All cti connections is cleaned at that time.
> csdev->ect_dev is set to NULL. coresight_control_assoc_ectdev()
> will return since since ect_csdev is NULL.
>
> I think we are safe without holding module reference since cti
> driver has done a pretty good job on clean up.
>

The issue here is not about clean up. We need to keep all the
programmed coresight modules loaded and operational while a coresight
trace session is in progress. The CTI can be used to control the
generation of trace, trigger trace related events etc.
If the module is removed while a session is in progress then this
programming is lost and the trace data collected may not be what is
expected.

Regards

Mike

> Let me know if you still think we need hold the module reference.
>
> Thanks, Tingwei
> > Regards
> >
> > Mike
> >
> >
> > --
> > Mike Leach
> > Principal Engineer, ARM Ltd.
> > Manchester Design Centre. UK
> >
> > _______________________________________________
> > linux-arm-kernel mailing list
> > linux-arm-kernel@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Tingwei Zhang July 26, 2020, 1:32 a.m. UTC | #10
Hi Mike,
On Sat, Jul 25, 2020 at 09:51:31PM +0800, Mike Leach wrote:
> Hi Tingwei,
> 
> On Sat, 25 Jul 2020 at 11:05, Tingwei Zhang <tingweiz@codeaurora.org>
> wrote:
> >
> > Hi Mike,
> >
> >  Thu, Jul 23, 2020 at 06:55:47PM +0800, Mike Leach wrote:
> > > Hi Tingwei,
> > >
> > > On Thu, 23 Jul 2020 at 05:28, Tingwei Zhang <tingwei@codeaurora.org>
> > > wrote:
> > > >
> > > > When coresight device is in an active session, driver module of
> > > > that device should not be removed. Use try_get_module() in
> > > > coresight_grab_device() to prevent module to be unloaded.
> > > >
> > > > Signed-off-by: Tingwei Zhang <tingwei@codeaurora.org>
> > > > ---
> > > >  drivers/hwtracing/coresight/coresight.c | 27
> +++++++++++++++++++++----
> > > >  1 file changed, 23 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/drivers/hwtracing/coresight/coresight.c
> > > b/drivers/hwtracing/coresight/coresight.c
> > > > index b7151c5f81b1..17bc76ea86ae 100644
> > > > --- a/drivers/hwtracing/coresight/coresight.c
> > > > +++ b/drivers/hwtracing/coresight/coresight.c
> > > > @@ -640,7 +640,7 @@ struct coresight_device
> > > *coresight_get_sink_by_id(u32 id)
> > > >   * don't appear on the trace path, they should be handled along
> with
> > > the
> > > >   * the master device.
> > > >   */
> > > > -static void coresight_grab_device(struct coresight_device *csdev)
> > > > +static int coresight_grab_device(struct coresight_device *csdev)
> > > >  {
> > > >         int i;
> > > >
> > > > @@ -648,10 +648,25 @@ static void coresight_grab_device(struct
> > > coresight_device *csdev)
> > > >                 struct coresight_device *child;
> > > >
> > > >                 child  = csdev->pdata->conns[i].child_dev;
> > > > -               if (child && child->type ==
> CORESIGHT_DEV_TYPE_HELPER)
> > > > +               if (child && child->type ==
> CORESIGHT_DEV_TYPE_HELPER) {
> > > > +                       if
> > > (!try_module_get(child->dev.parent->driver->owner))
> > > > +                               goto err;
> > > >                         pm_runtime_get_sync(child->dev.parent);
> > > > +               }
> > > >         }
> > > > +       if (!try_module_get(csdev->dev.parent->driver->owner))
> > > > +               goto err;
> > > >         pm_runtime_get_sync(csdev->dev.parent);
> > > > +       return 0;
> > > > +err:
> > > > +       for (i--; i >= 0; i--) {
> > > > +               struct coresight_device *child;
> > > > +
> > > > +               child  = csdev->pdata->conns[i].child_dev;
> > > > +               if (child && child->type ==
> CORESIGHT_DEV_TYPE_HELPER)
> > > > +                       pm_runtime_put(child->dev.parent);
> > > > +       }
> > > > +       return -ENODEV;
> > > >  }
> > > >
> > > >  /*
> > > > @@ -663,12 +678,15 @@ static void coresight_drop_device(struct
> > > coresight_device *csdev)
> > > >         int i;
> > > >
> > > >         pm_runtime_put(csdev->dev.parent);
> > > > +       module_put(csdev->dev.parent->driver->owner);
> > > >         for (i = 0; i < csdev->pdata->nr_outport; i++) {
> > > >                 struct coresight_device *child;
> > > >
> > > >                 child  = csdev->pdata->conns[i].child_dev;
> > > > -               if (child && child->type ==
> CORESIGHT_DEV_TYPE_HELPER)
> > > > +               if (child && child->type ==
> CORESIGHT_DEV_TYPE_HELPER) {
> > > >                         pm_runtime_put(child->dev.parent);
> > > > +
> module_put(child->dev.parent->driver->owner);
> > > > +               }
> > > >         }
> > > >  }
> > > >
> > > > @@ -721,7 +739,8 @@ static int _coresight_build_path(struct
> > > coresight_device *csdev,
> > > >         if (!node)
> > > >                 return -ENOMEM;
> > > >
> > > > -       coresight_grab_device(csdev);
> > > > +       if (coresight_grab_device(csdev))
> > > > +               return -ENODEV;
> > > >         node->csdev = csdev;
> > > >         list_add(&node->link, path);
> > > >
> > > > --
> > > > The Qualcomm Innovation Center, Inc. is a member of the Code Aurora
> > > Forum,
> > > > a Linux Foundation Collaborative Project
> > > >
> > >
> > > The CTI devices are associated with other coresight components using
> > > csdev->ect_dev; These are not handled in the main linear trace path as
> > > these have a star topology interlinking all components.
> > > However, when a component has an associated ect_dev then is enabled at
> > > the same time as the other component is enabled.
> > >
> > > So a module_get/put will be needed for this association to prevent the
> > > CTI being removed while a trace session is active.
> > > I suggest in coresight.c : coresight_control_assoc_ectdev()
> > >
> >
> > In module unload process, devices of that module will be removed. In
> > this case, cti_remove() is called on all cti device when unloading
> > cti module. All cti connections is cleaned at that time.
> > csdev->ect_dev is set to NULL. coresight_control_assoc_ectdev()
> > will return since since ect_csdev is NULL.
> >
> > I think we are safe without holding module reference since cti
> > driver has done a pretty good job on clean up.
> >
> 
> The issue here is not about clean up. We need to keep all the
> programmed coresight modules loaded and operational while a coresight
> trace session is in progress. The CTI can be used to control the
> generation of trace, trigger trace related events etc.
> If the module is removed while a session is in progress then this
> programming is lost and the trace data collected may not be what is
> expected.
>

Got your point now. In my opinion, CTI is kinda different to other
coresight components since it's optional.

For other components, they have to been there when path is built or
the enablement fails. Use can either get a successfully return which
means it's good and all devices/modules are there until path is
released, or a failed return which means trace session can't be
setup.

In CTI case, there's no garrantee that CTI related to the trace
session is there even the enablement is successful. For example, 
One cti is required for ETM tracing session. If cti module is
unloaded before enable that trace session. The enablement will
still be successfully done since there's no ect_dev connected
at all.

My point is holding cti module reference in enable path won't
fix all the problem. Altenatively, user should be aware that
unload cti module leads to all cti functioniol failure.
With current design, the behavior is consistant on cti. User
can unload cti module whenever he wants but it will break
the function.

Thanks, Tingwei
 
> Regards
> 
> Mike
> 
> > Let me know if you still think we need hold the module reference.
> >
> > Thanks, Tingwei
> > > Regards
> > >
> > > Mike
> > >
> > >
> > > --
> > > Mike Leach
> > > Principal Engineer, ARM Ltd.
> > > Manchester Design Centre. UK
> > >
> > > _______________________________________________
> > > linux-arm-kernel mailing list
> > > linux-arm-kernel@lists.infradead.org
> > > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 
> 
> 
> -- 
> Mike Leach
> Principal Engineer, ARM Ltd.
> Manchester Design Centre. UK
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Mike Leach July 27, 2020, 5:04 p.m. UTC | #11
Hi Tingwei,

On Sun, 26 Jul 2020 at 02:32, Tingwei Zhang <tingweiz@codeaurora.org> wrote:
>
> Hi Mike,
> On Sat, Jul 25, 2020 at 09:51:31PM +0800, Mike Leach wrote:
> > Hi Tingwei,
> >
> > On Sat, 25 Jul 2020 at 11:05, Tingwei Zhang <tingweiz@codeaurora.org>
> > wrote:
> > >
> > > Hi Mike,
> > >
> > >  Thu, Jul 23, 2020 at 06:55:47PM +0800, Mike Leach wrote:
> > > > Hi Tingwei,
> > > >
> > > > On Thu, 23 Jul 2020 at 05:28, Tingwei Zhang <tingwei@codeaurora.org>
> > > > wrote:
> > > > >
> > > > > When coresight device is in an active session, driver module of
> > > > > that device should not be removed. Use try_get_module() in
> > > > > coresight_grab_device() to prevent module to be unloaded.
> > > > >
> > > > > Signed-off-by: Tingwei Zhang <tingwei@codeaurora.org>
> > > > > ---
> > > > >  drivers/hwtracing/coresight/coresight.c | 27
> > +++++++++++++++++++++----
> > > > >  1 file changed, 23 insertions(+), 4 deletions(-)
> > > > >
> > > > > diff --git a/drivers/hwtracing/coresight/coresight.c
> > > > b/drivers/hwtracing/coresight/coresight.c
> > > > > index b7151c5f81b1..17bc76ea86ae 100644
> > > > > --- a/drivers/hwtracing/coresight/coresight.c
> > > > > +++ b/drivers/hwtracing/coresight/coresight.c
> > > > > @@ -640,7 +640,7 @@ struct coresight_device
> > > > *coresight_get_sink_by_id(u32 id)
> > > > >   * don't appear on the trace path, they should be handled along
> > with
> > > > the
> > > > >   * the master device.
> > > > >   */
> > > > > -static void coresight_grab_device(struct coresight_device *csdev)
> > > > > +static int coresight_grab_device(struct coresight_device *csdev)
> > > > >  {
> > > > >         int i;
> > > > >
> > > > > @@ -648,10 +648,25 @@ static void coresight_grab_device(struct
> > > > coresight_device *csdev)
> > > > >                 struct coresight_device *child;
> > > > >
> > > > >                 child  = csdev->pdata->conns[i].child_dev;
> > > > > -               if (child && child->type ==
> > CORESIGHT_DEV_TYPE_HELPER)
> > > > > +               if (child && child->type ==
> > CORESIGHT_DEV_TYPE_HELPER) {
> > > > > +                       if
> > > > (!try_module_get(child->dev.parent->driver->owner))
> > > > > +                               goto err;
> > > > >                         pm_runtime_get_sync(child->dev.parent);
> > > > > +               }
> > > > >         }
> > > > > +       if (!try_module_get(csdev->dev.parent->driver->owner))
> > > > > +               goto err;
> > > > >         pm_runtime_get_sync(csdev->dev.parent);
> > > > > +       return 0;
> > > > > +err:
> > > > > +       for (i--; i >= 0; i--) {
> > > > > +               struct coresight_device *child;
> > > > > +
> > > > > +               child  = csdev->pdata->conns[i].child_dev;
> > > > > +               if (child && child->type ==
> > CORESIGHT_DEV_TYPE_HELPER)
> > > > > +                       pm_runtime_put(child->dev.parent);
> > > > > +       }
> > > > > +       return -ENODEV;
> > > > >  }
> > > > >
> > > > >  /*
> > > > > @@ -663,12 +678,15 @@ static void coresight_drop_device(struct
> > > > coresight_device *csdev)
> > > > >         int i;
> > > > >
> > > > >         pm_runtime_put(csdev->dev.parent);
> > > > > +       module_put(csdev->dev.parent->driver->owner);
> > > > >         for (i = 0; i < csdev->pdata->nr_outport; i++) {
> > > > >                 struct coresight_device *child;
> > > > >
> > > > >                 child  = csdev->pdata->conns[i].child_dev;
> > > > > -               if (child && child->type ==
> > CORESIGHT_DEV_TYPE_HELPER)
> > > > > +               if (child && child->type ==
> > CORESIGHT_DEV_TYPE_HELPER) {
> > > > >                         pm_runtime_put(child->dev.parent);
> > > > > +
> > module_put(child->dev.parent->driver->owner);
> > > > > +               }
> > > > >         }
> > > > >  }
> > > > >
> > > > > @@ -721,7 +739,8 @@ static int _coresight_build_path(struct
> > > > coresight_device *csdev,
> > > > >         if (!node)
> > > > >                 return -ENOMEM;
> > > > >
> > > > > -       coresight_grab_device(csdev);
> > > > > +       if (coresight_grab_device(csdev))
> > > > > +               return -ENODEV;
> > > > >         node->csdev = csdev;
> > > > >         list_add(&node->link, path);
> > > > >
> > > > > --
> > > > > The Qualcomm Innovation Center, Inc. is a member of the Code Aurora
> > > > Forum,
> > > > > a Linux Foundation Collaborative Project
> > > > >
> > > >
> > > > The CTI devices are associated with other coresight components using
> > > > csdev->ect_dev; These are not handled in the main linear trace path as
> > > > these have a star topology interlinking all components.
> > > > However, when a component has an associated ect_dev then is enabled at
> > > > the same time as the other component is enabled.
> > > >
> > > > So a module_get/put will be needed for this association to prevent the
> > > > CTI being removed while a trace session is active.
> > > > I suggest in coresight.c : coresight_control_assoc_ectdev()
> > > >
> > >
> > > In module unload process, devices of that module will be removed. In
> > > this case, cti_remove() is called on all cti device when unloading
> > > cti module. All cti connections is cleaned at that time.
> > > csdev->ect_dev is set to NULL. coresight_control_assoc_ectdev()
> > > will return since since ect_csdev is NULL.
> > >
> > > I think we are safe without holding module reference since cti
> > > driver has done a pretty good job on clean up.
> > >
> >
> > The issue here is not about clean up. We need to keep all the
> > programmed coresight modules loaded and operational while a coresight
> > trace session is in progress. The CTI can be used to control the
> > generation of trace, trigger trace related events etc.
> > If the module is removed while a session is in progress then this
> > programming is lost and the trace data collected may not be what is
> > expected.
> >
>
> Got your point now. In my opinion, CTI is kinda different to other
> coresight components since it's optional.
>
> For other components, they have to been there when path is built or
> the enablement fails. Use can either get a successfully return which
> means it's good and all devices/modules are there until path is
> released, or a failed return which means trace session can't be
> setup.
>

The module get/put do more than ensure that the system can be enabled.
They ensure that no components in the coresight path can be unloaded
until all paths in use are released. This ensures that clients do not
get the trace system pulled out from underneath - corrupting trace
data, and possibly affecting the client program.

> In CTI case, there's no garrantee that CTI related to the trace
> session is there even the enablement is successful.

And there is no guarantee that it is not programmed to perform a key
task in controlling the generation of trace.

The entire programmed coresight system must be protected when in use.
That includes CTI devices. Otherwise the integrity of the captured
trace could be compromised, and indeed there may be no trace captured
if this is relying on CTI triggers.

>For example,
> One cti is required for ETM tracing session. If cti module is
> unloaded before enable that trace session. The enablement will
> still be successfully done since there's no ect_dev connected
> at all.
>
> My point is holding cti module reference in enable path won't
> fix all the problem. Altenatively, user should be aware that
> unload cti module leads to all cti functioniol failure.
> With current design, the behavior is consistant on cti. User
> can unload cti module whenever he wants but it will break
> the function.

Precisely - and this is what should never be allowed - why can the CTI
be used to break the trace system but not other components?

Moving forwards we will see users who simply use pre-programmed
settings from client programs such as perf. It may be that some users
do not have the credentials to add and remove modules - this could be
an admin function where larger systems with multiple sessions are in
use.


Regards

Mike


>
> Thanks, Tingwei
>
> > Regards
> >
> > Mike
> >
> > > Let me know if you still think we need hold the module reference.
> > >
> > > Thanks, Tingwei
> > > > Regards
> > > >
> > > > Mike
> > > >
> > > >
> > > > --
> > > > Mike Leach
> > > > Principal Engineer, ARM Ltd.
> > > > Manchester Design Centre. UK
> > > >
> > > > _______________________________________________
> > > > linux-arm-kernel mailing list
> > > > linux-arm-kernel@lists.infradead.org
> > > > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> >
> >
> >
> > --
> > Mike Leach
> > Principal Engineer, ARM Ltd.
> > Manchester Design Centre. UK
> >
> > _______________________________________________
> > linux-arm-kernel mailing list
> > linux-arm-kernel@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Tingwei Zhang July 28, 2020, 1:07 a.m. UTC | #12
Hi Mike,

On Tue, Jul 28, 2020 at 01:04:21AM +0800, Mike Leach wrote:
> Hi Tingwei,
> 
> On Sun, 26 Jul 2020 at 02:32, Tingwei Zhang <tingweiz@codeaurora.org>
> wrote:
> >
> > Hi Mike,
> > On Sat, Jul 25, 2020 at 09:51:31PM +0800, Mike Leach wrote:
> > > Hi Tingwei,
> > >
> > > On Sat, 25 Jul 2020 at 11:05, Tingwei Zhang <tingweiz@codeaurora.org>
> > > wrote:
> > > >
> > > > Hi Mike,
> > > >
> > > >  Thu, Jul 23, 2020 at 06:55:47PM +0800, Mike Leach wrote:
> > > > > Hi Tingwei,
> > > > >
> > > > > On Thu, 23 Jul 2020 at 05:28, Tingwei Zhang
> <tingwei@codeaurora.org>
> > > > > wrote:
> > > > > >
> > > > > > When coresight device is in an active session, driver module of
> > > > > > that device should not be removed. Use try_get_module() in
> > > > > > coresight_grab_device() to prevent module to be unloaded.
> > > > > >
> > > > > > Signed-off-by: Tingwei Zhang <tingwei@codeaurora.org>
> > > > > > ---
> > > > > >  drivers/hwtracing/coresight/coresight.c | 27
> > > +++++++++++++++++++++----
> > > > > >  1 file changed, 23 insertions(+), 4 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/hwtracing/coresight/coresight.c
> > > > > b/drivers/hwtracing/coresight/coresight.c
> > > > > > index b7151c5f81b1..17bc76ea86ae 100644
> > > > > > --- a/drivers/hwtracing/coresight/coresight.c
> > > > > > +++ b/drivers/hwtracing/coresight/coresight.c
> > > > > > @@ -640,7 +640,7 @@ struct coresight_device
> > > > > *coresight_get_sink_by_id(u32 id)
> > > > > >   * don't appear on the trace path, they should be handled along
> > > with
> > > > > the
> > > > > >   * the master device.
> > > > > >   */
> > > > > > -static void coresight_grab_device(struct coresight_device
> *csdev)
> > > > > > +static int coresight_grab_device(struct coresight_device
> *csdev)
> > > > > >  {
> > > > > >         int i;
> > > > > >
> > > > > > @@ -648,10 +648,25 @@ static void coresight_grab_device(struct
> > > > > coresight_device *csdev)
> > > > > >                 struct coresight_device *child;
> > > > > >
> > > > > >                 child  = csdev->pdata->conns[i].child_dev;
> > > > > > -               if (child && child->type ==
> > > CORESIGHT_DEV_TYPE_HELPER)
> > > > > > +               if (child && child->type ==
> > > CORESIGHT_DEV_TYPE_HELPER) {
> > > > > > +                       if
> > > > > (!try_module_get(child->dev.parent->driver->owner))
> > > > > > +                               goto err;
> > > > > >                         pm_runtime_get_sync(child->dev.parent);
> > > > > > +               }
> > > > > >         }
> > > > > > +       if (!try_module_get(csdev->dev.parent->driver->owner))
> > > > > > +               goto err;
> > > > > >         pm_runtime_get_sync(csdev->dev.parent);
> > > > > > +       return 0;
> > > > > > +err:
> > > > > > +       for (i--; i >= 0; i--) {
> > > > > > +               struct coresight_device *child;
> > > > > > +
> > > > > > +               child  = csdev->pdata->conns[i].child_dev;
> > > > > > +               if (child && child->type ==
> > > CORESIGHT_DEV_TYPE_HELPER)
> > > > > > +                       pm_runtime_put(child->dev.parent);
> > > > > > +       }
> > > > > > +       return -ENODEV;
> > > > > >  }
> > > > > >
> > > > > >  /*
> > > > > > @@ -663,12 +678,15 @@ static void coresight_drop_device(struct
> > > > > coresight_device *csdev)
> > > > > >         int i;
> > > > > >
> > > > > >         pm_runtime_put(csdev->dev.parent);
> > > > > > +       module_put(csdev->dev.parent->driver->owner);
> > > > > >         for (i = 0; i < csdev->pdata->nr_outport; i++) {
> > > > > >                 struct coresight_device *child;
> > > > > >
> > > > > >                 child  = csdev->pdata->conns[i].child_dev;
> > > > > > -               if (child && child->type ==
> > > CORESIGHT_DEV_TYPE_HELPER)
> > > > > > +               if (child && child->type ==
> > > CORESIGHT_DEV_TYPE_HELPER) {
> > > > > >                         pm_runtime_put(child->dev.parent);
> > > > > > +
> > > module_put(child->dev.parent->driver->owner);
> > > > > > +               }
> > > > > >         }
> > > > > >  }
> > > > > >
> > > > > > @@ -721,7 +739,8 @@ static int _coresight_build_path(struct
> > > > > coresight_device *csdev,
> > > > > >         if (!node)
> > > > > >                 return -ENOMEM;
> > > > > >
> > > > > > -       coresight_grab_device(csdev);
> > > > > > +       if (coresight_grab_device(csdev))
> > > > > > +               return -ENODEV;
> > > > > >         node->csdev = csdev;
> > > > > >         list_add(&node->link, path);
> > > > > >
> > > > > > --
> > > > > > The Qualcomm Innovation Center, Inc. is a member of the Code
> Aurora
> > > > > Forum,
> > > > > > a Linux Foundation Collaborative Project
> > > > > >
> > > > >
> > > > > The CTI devices are associated with other coresight components
> using
> > > > > csdev->ect_dev; These are not handled in the main linear trace
> path as
> > > > > these have a star topology interlinking all components.
> > > > > However, when a component has an associated ect_dev then is
> enabled at
> > > > > the same time as the other component is enabled.
> > > > >
> > > > > So a module_get/put will be needed for this association to prevent
> the
> > > > > CTI being removed while a trace session is active.
> > > > > I suggest in coresight.c : coresight_control_assoc_ectdev()
> > > > >
> > > >
> > > > In module unload process, devices of that module will be removed. In
> > > > this case, cti_remove() is called on all cti device when unloading
> > > > cti module. All cti connections is cleaned at that time.
> > > > csdev->ect_dev is set to NULL. coresight_control_assoc_ectdev()
> > > > will return since since ect_csdev is NULL.
> > > >
> > > > I think we are safe without holding module reference since cti
> > > > driver has done a pretty good job on clean up.
> > > >
> > >
> > > The issue here is not about clean up. We need to keep all the
> > > programmed coresight modules loaded and operational while a coresight
> > > trace session is in progress. The CTI can be used to control the
> > > generation of trace, trigger trace related events etc.
> > > If the module is removed while a session is in progress then this
> > > programming is lost and the trace data collected may not be what is
> > > expected.
> > >
> >
> > Got your point now. In my opinion, CTI is kinda different to other
> > coresight components since it's optional.
> >
> > For other components, they have to been there when path is built or
> > the enablement fails. Use can either get a successfully return which
> > means it's good and all devices/modules are there until path is
> > released, or a failed return which means trace session can't be
> > setup.
> >
> 
> The module get/put do more than ensure that the system can be enabled.
> They ensure that no components in the coresight path can be unloaded
> until all paths in use are released. This ensures that clients do not
> get the trace system pulled out from underneath - corrupting trace
> data, and possibly affecting the client program.
>

I agree.
 
> > In CTI case, there's no garrantee that CTI related to the trace
> > session is there even the enablement is successful.
> 
> And there is no guarantee that it is not programmed to perform a key
> task in controlling the generation of trace.
> 
> The entire programmed coresight system must be protected when in use.
> That includes CTI devices. Otherwise the integrity of the captured
> trace could be compromised, and indeed there may be no trace captured
> if this is relying on CTI triggers.
> 

That's correct.  If CTI is not there, the trace may be wrong or no
trace as all.

> >For example,
> > One cti is required for ETM tracing session. If cti module is
> > unloaded before enable that trace session. The enablement will
> > still be successfully done since there's no ect_dev connected
> > at all.
> >
> > My point is holding cti module reference in enable path won't
> > fix all the problem. Altenatively, user should be aware that
> > unload cti module leads to all cti functioniol failure.
> > With current design, the behavior is consistant on cti. User
> > can unload cti module whenever he wants but it will break
> > the function.
> 
> Precisely - and this is what should never be allowed - why can the CTI
> be used to break the trace system but not other components?
> 

We can't prevent user to unload CTI module when there's no active
trace session. If we don't allow CTI to be not available, we need to
check whether CTI is there in build_path routine or coresight enable
routine. We need return failure if CTI is not there when user enables
the trace session. Currently, coresight framework check associate CTI
device, but if CTI deivce is not probed due to probe failure or
module unload, coresight framework assume there's no assocaite CTI
device and just continue the enablement.  It won't return failure.

If we have plan to change CTI check to be mandatory, make the coresight
enablement fail if CTI is not there like other trace components.  I'm
perfect fine to add module_get/module_put for CTI.

> Moving forwards we will see users who simply use pre-programmed
> settings from client programs such as perf. It may be that some users
> do not have the credentials to add and remove modules - this could be
> an admin function where larger systems with multiple sessions are in
> use.
> 
> 
> Regards
> 
> Mike
> 
> 
> >
> > Thanks, Tingwei
> >
> > > Regards
> > >
> > > Mike
> > >
> > > > Let me know if you still think we need hold the module reference.
> > > >
> > > > Thanks, Tingwei
> > > > > Regards
> > > > >
> > > > > Mike
> > > > >
> > > > >
> > > > > --
> > > > > Mike Leach
> > > > > Principal Engineer, ARM Ltd.
> > > > > Manchester Design Centre. UK
> > > > >
> > > > > _______________________________________________
> > > > > linux-arm-kernel mailing list
> > > > > linux-arm-kernel@lists.infradead.org
> > > > > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> > >
> > >
> > >
> > > --
> > > Mike Leach
> > > Principal Engineer, ARM Ltd.
> > > Manchester Design Centre. UK
> > >
> > > _______________________________________________
> > > linux-arm-kernel mailing list
> > > linux-arm-kernel@lists.infradead.org
> > > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 
> 
> 
> -- 
> Mike Leach
> Principal Engineer, ARM Ltd.
> Manchester Design Centre. UK
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Mike Leach July 28, 2020, 10:08 a.m. UTC | #13
Hi Tingwei,

On Tue, 28 Jul 2020 at 02:07, Tingwei Zhang <tingweiz@codeaurora.org> wrote:
>
> Hi Mike,
>
> On Tue, Jul 28, 2020 at 01:04:21AM +0800, Mike Leach wrote:
> > Hi Tingwei,
> >
> > On Sun, 26 Jul 2020 at 02:32, Tingwei Zhang <tingweiz@codeaurora.org>
> > wrote:
> > >
> > > Hi Mike,
> > > On Sat, Jul 25, 2020 at 09:51:31PM +0800, Mike Leach wrote:
> > > > Hi Tingwei,
> > > >
> > > > On Sat, 25 Jul 2020 at 11:05, Tingwei Zhang <tingweiz@codeaurora.org>
> > > > wrote:
> > > > >
> > > > > Hi Mike,
> > > > >
> > > > >  Thu, Jul 23, 2020 at 06:55:47PM +0800, Mike Leach wrote:
> > > > > > Hi Tingwei,
> > > > > >
> > > > > > On Thu, 23 Jul 2020 at 05:28, Tingwei Zhang
> > <tingwei@codeaurora.org>
> > > > > > wrote:
> > > > > > >
> > > > > > > When coresight device is in an active session, driver module of
> > > > > > > that device should not be removed. Use try_get_module() in
> > > > > > > coresight_grab_device() to prevent module to be unloaded.
> > > > > > >
> > > > > > > Signed-off-by: Tingwei Zhang <tingwei@codeaurora.org>
> > > > > > > ---
> > > > > > >  drivers/hwtracing/coresight/coresight.c | 27
> > > > +++++++++++++++++++++----
> > > > > > >  1 file changed, 23 insertions(+), 4 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/hwtracing/coresight/coresight.c
> > > > > > b/drivers/hwtracing/coresight/coresight.c
> > > > > > > index b7151c5f81b1..17bc76ea86ae 100644
> > > > > > > --- a/drivers/hwtracing/coresight/coresight.c
> > > > > > > +++ b/drivers/hwtracing/coresight/coresight.c
> > > > > > > @@ -640,7 +640,7 @@ struct coresight_device
> > > > > > *coresight_get_sink_by_id(u32 id)
> > > > > > >   * don't appear on the trace path, they should be handled along
> > > > with
> > > > > > the
> > > > > > >   * the master device.
> > > > > > >   */
> > > > > > > -static void coresight_grab_device(struct coresight_device
> > *csdev)
> > > > > > > +static int coresight_grab_device(struct coresight_device
> > *csdev)
> > > > > > >  {
> > > > > > >         int i;
> > > > > > >
> > > > > > > @@ -648,10 +648,25 @@ static void coresight_grab_device(struct
> > > > > > coresight_device *csdev)
> > > > > > >                 struct coresight_device *child;
> > > > > > >
> > > > > > >                 child  = csdev->pdata->conns[i].child_dev;
> > > > > > > -               if (child && child->type ==
> > > > CORESIGHT_DEV_TYPE_HELPER)
> > > > > > > +               if (child && child->type ==
> > > > CORESIGHT_DEV_TYPE_HELPER) {
> > > > > > > +                       if
> > > > > > (!try_module_get(child->dev.parent->driver->owner))
> > > > > > > +                               goto err;
> > > > > > >                         pm_runtime_get_sync(child->dev.parent);
> > > > > > > +               }
> > > > > > >         }
> > > > > > > +       if (!try_module_get(csdev->dev.parent->driver->owner))
> > > > > > > +               goto err;
> > > > > > >         pm_runtime_get_sync(csdev->dev.parent);
> > > > > > > +       return 0;
> > > > > > > +err:
> > > > > > > +       for (i--; i >= 0; i--) {
> > > > > > > +               struct coresight_device *child;
> > > > > > > +
> > > > > > > +               child  = csdev->pdata->conns[i].child_dev;
> > > > > > > +               if (child && child->type ==
> > > > CORESIGHT_DEV_TYPE_HELPER)
> > > > > > > +                       pm_runtime_put(child->dev.parent);
> > > > > > > +       }
> > > > > > > +       return -ENODEV;
> > > > > > >  }
> > > > > > >
> > > > > > >  /*
> > > > > > > @@ -663,12 +678,15 @@ static void coresight_drop_device(struct
> > > > > > coresight_device *csdev)
> > > > > > >         int i;
> > > > > > >
> > > > > > >         pm_runtime_put(csdev->dev.parent);
> > > > > > > +       module_put(csdev->dev.parent->driver->owner);
> > > > > > >         for (i = 0; i < csdev->pdata->nr_outport; i++) {
> > > > > > >                 struct coresight_device *child;
> > > > > > >
> > > > > > >                 child  = csdev->pdata->conns[i].child_dev;
> > > > > > > -               if (child && child->type ==
> > > > CORESIGHT_DEV_TYPE_HELPER)
> > > > > > > +               if (child && child->type ==
> > > > CORESIGHT_DEV_TYPE_HELPER) {
> > > > > > >                         pm_runtime_put(child->dev.parent);
> > > > > > > +
> > > > module_put(child->dev.parent->driver->owner);
> > > > > > > +               }
> > > > > > >         }
> > > > > > >  }
> > > > > > >
> > > > > > > @@ -721,7 +739,8 @@ static int _coresight_build_path(struct
> > > > > > coresight_device *csdev,
> > > > > > >         if (!node)
> > > > > > >                 return -ENOMEM;
> > > > > > >
> > > > > > > -       coresight_grab_device(csdev);
> > > > > > > +       if (coresight_grab_device(csdev))
> > > > > > > +               return -ENODEV;
> > > > > > >         node->csdev = csdev;
> > > > > > >         list_add(&node->link, path);
> > > > > > >
> > > > > > > --
> > > > > > > The Qualcomm Innovation Center, Inc. is a member of the Code
> > Aurora
> > > > > > Forum,
> > > > > > > a Linux Foundation Collaborative Project
> > > > > > >
> > > > > >
> > > > > > The CTI devices are associated with other coresight components
> > using
> > > > > > csdev->ect_dev; These are not handled in the main linear trace
> > path as
> > > > > > these have a star topology interlinking all components.
> > > > > > However, when a component has an associated ect_dev then is
> > enabled at
> > > > > > the same time as the other component is enabled.
> > > > > >
> > > > > > So a module_get/put will be needed for this association to prevent
> > the
> > > > > > CTI being removed while a trace session is active.
> > > > > > I suggest in coresight.c : coresight_control_assoc_ectdev()
> > > > > >
> > > > >
> > > > > In module unload process, devices of that module will be removed. In
> > > > > this case, cti_remove() is called on all cti device when unloading
> > > > > cti module. All cti connections is cleaned at that time.
> > > > > csdev->ect_dev is set to NULL. coresight_control_assoc_ectdev()
> > > > > will return since since ect_csdev is NULL.
> > > > >
> > > > > I think we are safe without holding module reference since cti
> > > > > driver has done a pretty good job on clean up.
> > > > >
> > > >
> > > > The issue here is not about clean up. We need to keep all the
> > > > programmed coresight modules loaded and operational while a coresight
> > > > trace session is in progress. The CTI can be used to control the
> > > > generation of trace, trigger trace related events etc.
> > > > If the module is removed while a session is in progress then this
> > > > programming is lost and the trace data collected may not be what is
> > > > expected.
> > > >
> > >
> > > Got your point now. In my opinion, CTI is kinda different to other
> > > coresight components since it's optional.
> > >
> > > For other components, they have to been there when path is built or
> > > the enablement fails. Use can either get a successfully return which
> > > means it's good and all devices/modules are there until path is
> > > released, or a failed return which means trace session can't be
> > > setup.
> > >
> >
> > The module get/put do more than ensure that the system can be enabled.
> > They ensure that no components in the coresight path can be unloaded
> > until all paths in use are released. This ensures that clients do not
> > get the trace system pulled out from underneath - corrupting trace
> > data, and possibly affecting the client program.
> >
>
> I agree.
>
> > > In CTI case, there's no garrantee that CTI related to the trace
> > > session is there even the enablement is successful.
> >
> > And there is no guarantee that it is not programmed to perform a key
> > task in controlling the generation of trace.
> >
> > The entire programmed coresight system must be protected when in use.
> > That includes CTI devices. Otherwise the integrity of the captured
> > trace could be compromised, and indeed there may be no trace captured
> > if this is relying on CTI triggers.
> >
>
> That's correct.  If CTI is not there, the trace may be wrong or no
> trace as all.
>
> > >For example,
> > > One cti is required for ETM tracing session. If cti module is
> > > unloaded before enable that trace session. The enablement will
> > > still be successfully done since there's no ect_dev connected
> > > at all.
> > >
> > > My point is holding cti module reference in enable path won't
> > > fix all the problem. Altenatively, user should be aware that
> > > unload cti module leads to all cti functioniol failure.
> > > With current design, the behavior is consistant on cti. User
> > > can unload cti module whenever he wants but it will break
> > > the function.
> >
> > Precisely - and this is what should never be allowed - why can the CTI
> > be used to break the trace system but not other components?
> >
>

Sorry - I may not have been clear here - I do not want to make having
a CTI mandatory for enabling the trace session - this is an optional
component.

> We can't prevent user to unload CTI module when there's no active
> trace session.

Agreed - the same applies to all the other components.

> If we don't allow CTI to be not available, we need to
> check whether CTI is there in build_path routine or coresight enable
> routine. We need return failure if CTI is not there when user enables
> the trace session. Currently, coresight framework check associate CTI
> device, but if CTI deivce is not probed due to probe failure or
> module unload, coresight framework assume there's no assocaite CTI
> device and just continue the enablement.  It won't return failure.
>
> If we have plan to change CTI check to be mandatory, make the coresight
> enablement fail if CTI is not there like other trace components.  I'm
> perfect fine to add module_get/module_put for CTI.
>

If the CTI is present at the time the trace path is created, then it
must be enabled, and a failure to enable the CTI fails the entire path
enable - this is the operation that is implemented by
coresight_control_assoc_ectdev() at this time.
If a CTI is not present at the time the trace path is created, then as
you have said previously, the ect_dev pointer will be NULL and the
check and enable of CTI will be skipped by
coresight_control_assoc_ectdev() and have no effect on the trace path.

For the module version, if CTI is present and enabled then the module
must be locked so it cannot be removed while the trace is active -
this is the only change that is required. So adding get / put module
in coresight_control_assoc_ectdev() will lock the module in for the
duration that trace is active.

I've tested the code below to ensure that the cti module is correctly
locked while trace is active, but can be removed as required when
trace is inactive.

Regards

Mike

---------------------------------------------------------------------------------
static int
coresight_control_assoc_ectdev(struct coresight_device *csdev, bool enable)
{
    int ect_ret = 0;
    struct coresight_device *ect_csdev = csdev->ect_dev;
    struct module *mod;

    if (!ect_csdev)
        return 0;

    if ((!ect_ops(ect_csdev)->enable) || (!ect_ops(ect_csdev)->disable))
        return 0;

    mod = ect_csdev->dev.parent->driver->owner;

    if (enable) {
        if (try_module_get(mod)) {
            ect_ret = ect_ops(ect_csdev)->enable(ect_csdev);
            if (ect_ret)
                module_put(mod);
        } else
            ect_ret = -ENODEV;
    } else {
        module_put(mod);
        ect_ret = ect_ops(ect_csdev)->disable(ect_csdev);
    }

    /* output warning if ECT enable is preventing trace operation */
    if (ect_ret)
        dev_info(&csdev->dev, "Associated ECT device (%s) %s failed\n",
             dev_name(&ect_csdev->dev),
             enable ? "enable" : "disable");
    return ect_ret;
}
-----------------------------------------------------------------------------------------
Tests:-
-----------------------
root@linaro-developer:/home/linaro/scripts# ./trace-sysfs-start.bash
Tracing via sysfs
enabling etf
wait before enable etm
enabling etm 0.
enabling etm 1.
enabling etm 2.
enabling etm 3.
waiting for trace
root@linaro-developer:/home/linaro/scripts# rmmod ../cs-mods/coresight-etm4x.ko
rmmod: ERROR: Module coresight_etm4x is in use
root@linaro-developer:/home/linaro/scripts# rmmod ../cs-mods/coresight-cti.ko
rmmod: ERROR: Module coresight_cti is in use
root@linaro-developer:/home/linaro/scripts# ./trace-sysfs-end.bash
ending trace
disabling etm 0.
disabling etm 1.
disabling etm 2.
disabling etm 3.
disabling etf
16+0 records in
16+0 records out
8192 bytes (8.2 kB) copied, 0.000834116 s, 9.8 MB/s
root@linaro-developer:/home/linaro/scripts# rmmod ../cs-mods/coresight-cti.ko
root@linaro-developer:/home/linaro/scripts# rmmod ../cs-mods/coresight-etm4x.ko
root@linaro-developer:/home/linaro/scripts# insmod ../cs-mods/coresight-etm4x.ko
root@linaro-developer:/home/linaro/scripts# ./trace-sysfs-start.bash
Tracing via sysfs
enabling etf
wait before enable etm
enabling etm 0.
enabling etm 1.
enabling etm 2.
enabling etm 3.
waiting for trace
root@linaro-developer:/home/linaro/scripts# rmmod ../cs-mods/coresight-etm4x.ko
rmmod: ERROR: Module coresight_etm4x is in use
root@linaro-developer:/home/linaro/scripts# rmmod ../cs-mods/coresight-cti.ko
rmmod: ERROR: Module coresight_cti is not currently loaded
root@linaro-developer:/home/linaro/scripts# ./trace-sysfs-end.bash
ending trace
disabling etm 0.
disabling etm 1.
disabling etm 2.
disabling etm 3.
disabling etf
16+0 records in
16+0 records out
8192 bytes (8.2 kB) copied, 0.000819117 s, 10.0 MB/s

}



> > Moving forwards we will see users who simply use pre-programmed
> > settings from client programs such as perf. It may be that some users
> > do not have the credentials to add and remove modules - this could be
> > an admin function where larger systems with multiple sessions are in
> > use.
> >
> >
> > Regards
> >
> > Mike
> >
> >
> > >
> > > Thanks, Tingwei
> > >
> > > > Regards
> > > >
> > > > Mike
> > > >
> > > > > Let me know if you still think we need hold the module reference.
> > > > >
> > > > > Thanks, Tingwei
> > > > > > Regards
> > > > > >
> > > > > > Mike
> > > > > >
> > > > > >
> > > > > > --
> > > > > > Mike Leach
> > > > > > Principal Engineer, ARM Ltd.
> > > > > > Manchester Design Centre. UK
> > > > > >
> > > > > > _______________________________________________
> > > > > > linux-arm-kernel mailing list
> > > > > > linux-arm-kernel@lists.infradead.org
> > > > > > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> > > >
> > > >
> > > >
> > > > --
> > > > Mike Leach
> > > > Principal Engineer, ARM Ltd.
> > > > Manchester Design Centre. UK
> > > >
> > > > _______________________________________________
> > > > linux-arm-kernel mailing list
> > > > linux-arm-kernel@lists.infradead.org
> > > > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> >
> >
> >
> > --
> > Mike Leach
> > Principal Engineer, ARM Ltd.
> > Manchester Design Centre. UK
> >
> > _______________________________________________
> > linux-arm-kernel mailing list
> > linux-arm-kernel@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel



--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
Tingwei Zhang July 28, 2020, 11:17 a.m. UTC | #14
Hi Mike,
On Tue, Jul 28, 2020 at 06:08:37PM +0800, Mike Leach wrote:
> Hi Tingwei,
> 
> On Tue, 28 Jul 2020 at 02:07, Tingwei Zhang <tingweiz@codeaurora.org> wrote:
> >
> > Hi Mike,
> >
> > On Tue, Jul 28, 2020 at 01:04:21AM +0800, Mike Leach wrote:
> > > Hi Tingwei,
> > >
> > > On Sun, 26 Jul 2020 at 02:32, Tingwei Zhang <tingweiz@codeaurora.org>
> > > wrote:
> > > >
> > > > Hi Mike,
> > > > On Sat, Jul 25, 2020 at 09:51:31PM +0800, Mike Leach wrote:
> > > > > Hi Tingwei,
> > > > >
> > > > > On Sat, 25 Jul 2020 at 11:05, Tingwei Zhang 
> > > > > <tingweiz@codeaurora.org>
> > > > > wrote:
> > > > > >
> > > > > > Hi Mike,
> > > > > >
> > > > > >  Thu, Jul 23, 2020 at 06:55:47PM +0800, Mike Leach wrote:
> > > > > > > Hi Tingwei,
> > > > > > >
> > > > > > > On Thu, 23 Jul 2020 at 05:28, Tingwei Zhang
> > > <tingwei@codeaurora.org>
> > > > > > > wrote:
> > > > > > > >
> > > > > > > > When coresight device is in an active session, driver module 
> > > > > > > > of
> > > > > > > > that device should not be removed. Use try_get_module() in
> > > > > > > > coresight_grab_device() to prevent module to be unloaded.
> > > > > > > >
> > > > > > > > Signed-off-by: Tingwei Zhang <tingwei@codeaurora.org>
> > > > > > > > ---
> > > > > > > >  drivers/hwtracing/coresight/coresight.c | 27
> > > > > +++++++++++++++++++++----
> > > > > > > >  1 file changed, 23 insertions(+), 4 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/hwtracing/coresight/coresight.c
> > > > > > > b/drivers/hwtracing/coresight/coresight.c
> > > > > > > > index b7151c5f81b1..17bc76ea86ae 100644
> > > > > > > > --- a/drivers/hwtracing/coresight/coresight.c
> > > > > > > > +++ b/drivers/hwtracing/coresight/coresight.c
> > > > > > > > @@ -640,7 +640,7 @@ struct coresight_device
> > > > > > > *coresight_get_sink_by_id(u32 id)
> > > > > > > >   * don't appear on the trace path, they should be handled 
> > > > > > > > along
> > > > > with
> > > > > > > the
> > > > > > > >   * the master device.
> > > > > > > >   */
> > > > > > > > -static void coresight_grab_device(struct coresight_device
> > > *csdev)
> > > > > > > > +static int coresight_grab_device(struct coresight_device
> > > *csdev)
> > > > > > > >  {
> > > > > > > >         int i;
> > > > > > > >
> > > > > > > > @@ -648,10 +648,25 @@ static void coresight_grab_device(struct
> > > > > > > coresight_device *csdev)
> > > > > > > >                 struct coresight_device *child;
> > > > > > > >
> > > > > > > >                 child  = csdev->pdata->conns[i].child_dev;
> > > > > > > > -               if (child && child->type ==
> > > > > CORESIGHT_DEV_TYPE_HELPER)
> > > > > > > > +               if (child && child->type ==
> > > > > CORESIGHT_DEV_TYPE_HELPER) {
> > > > > > > > +                       if
> > > > > > > (!try_module_get(child->dev.parent->driver->owner))
> > > > > > > > +                               goto err;
> > > > > > > > 
> > > > > > > > pm_runtime_get_sync(child->dev.parent);
> > > > > > > > +               }
> > > > > > > >         }
> > > > > > > > +       if (!try_module_get(csdev->dev.parent->driver->owner))
> > > > > > > > +               goto err;
> > > > > > > >         pm_runtime_get_sync(csdev->dev.parent);
> > > > > > > > +       return 0;
> > > > > > > > +err:
> > > > > > > > +       for (i--; i >= 0; i--) {
> > > > > > > > +               struct coresight_device *child;
> > > > > > > > +
> > > > > > > > +               child  = csdev->pdata->conns[i].child_dev;
> > > > > > > > +               if (child && child->type ==
> > > > > CORESIGHT_DEV_TYPE_HELPER)
> > > > > > > > +                       pm_runtime_put(child->dev.parent);
> > > > > > > > +       }
> > > > > > > > +       return -ENODEV;
> > > > > > > >  }
> > > > > > > >
> > > > > > > >  /*
> > > > > > > > @@ -663,12 +678,15 @@ static void coresight_drop_device(struct
> > > > > > > coresight_device *csdev)
> > > > > > > >         int i;
> > > > > > > >
> > > > > > > >         pm_runtime_put(csdev->dev.parent);
> > > > > > > > +       module_put(csdev->dev.parent->driver->owner);
> > > > > > > >         for (i = 0; i < csdev->pdata->nr_outport; i++) {
> > > > > > > >                 struct coresight_device *child;
> > > > > > > >
> > > > > > > >                 child  = csdev->pdata->conns[i].child_dev;
> > > > > > > > -               if (child && child->type ==
> > > > > CORESIGHT_DEV_TYPE_HELPER)
> > > > > > > > +               if (child && child->type ==
> > > > > CORESIGHT_DEV_TYPE_HELPER) {
> > > > > > > >                         pm_runtime_put(child->dev.parent);
> > > > > > > > +
> > > > > module_put(child->dev.parent->driver->owner);
> > > > > > > > +               }
> > > > > > > >         }
> > > > > > > >  }
> > > > > > > >
> > > > > > > > @@ -721,7 +739,8 @@ static int _coresight_build_path(struct
> > > > > > > coresight_device *csdev,
> > > > > > > >         if (!node)
> > > > > > > >                 return -ENOMEM;
> > > > > > > >
> > > > > > > > -       coresight_grab_device(csdev);
> > > > > > > > +       if (coresight_grab_device(csdev))
> > > > > > > > +               return -ENODEV;
> > > > > > > >         node->csdev = csdev;
> > > > > > > >         list_add(&node->link, path);
> > > > > > > >
> > > > > > > > --
> > > > > > > > The Qualcomm Innovation Center, Inc. is a member of the Code
> > > Aurora
> > > > > > > Forum,
> > > > > > > > a Linux Foundation Collaborative Project
> > > > > > > >
> > > > > > >
> > > > > > > The CTI devices are associated with other coresight components
> > > using
> > > > > > > csdev->ect_dev; These are not handled in the main linear trace
> > > path as
> > > > > > > these have a star topology interlinking all components.
> > > > > > > However, when a component has an associated ect_dev then is
> > > enabled at
> > > > > > > the same time as the other component is enabled.
> > > > > > >
> > > > > > > So a module_get/put will be needed for this association to 
> > > > > > > prevent
> > > the
> > > > > > > CTI being removed while a trace session is active.
> > > > > > > I suggest in coresight.c : coresight_control_assoc_ectdev()
> > > > > > >
> > > > > >
> > > > > > In module unload process, devices of that module will be removed. 
> > > > > > In
> > > > > > this case, cti_remove() is called on all cti device when unloading
> > > > > > cti module. All cti connections is cleaned at that time.
> > > > > > csdev->ect_dev is set to NULL. coresight_control_assoc_ectdev()
> > > > > > will return since since ect_csdev is NULL.
> > > > > >
> > > > > > I think we are safe without holding module reference since cti
> > > > > > driver has done a pretty good job on clean up.
> > > > > >
> > > > >
> > > > > The issue here is not about clean up. We need to keep all the
> > > > > programmed coresight modules loaded and operational while a 
> > > > > coresight
> > > > > trace session is in progress. The CTI can be used to control the
> > > > > generation of trace, trigger trace related events etc.
> > > > > If the module is removed while a session is in progress then this
> > > > > programming is lost and the trace data collected may not be what is
> > > > > expected.
> > > > >
> > > >
> > > > Got your point now. In my opinion, CTI is kinda different to other
> > > > coresight components since it's optional.
> > > >
> > > > For other components, they have to been there when path is built or
> > > > the enablement fails. Use can either get a successfully return which
> > > > means it's good and all devices/modules are there until path is
> > > > released, or a failed return which means trace session can't be
> > > > setup.
> > > >
> > >
> > > The module get/put do more than ensure that the system can be enabled.
> > > They ensure that no components in the coresight path can be unloaded
> > > until all paths in use are released. This ensures that clients do not
> > > get the trace system pulled out from underneath - corrupting trace
> > > data, and possibly affecting the client program.
> > >
> >
> > I agree.
> >
> > > > In CTI case, there's no garrantee that CTI related to the trace
> > > > session is there even the enablement is successful.
> > >
> > > And there is no guarantee that it is not programmed to perform a key
> > > task in controlling the generation of trace.
> > >
> > > The entire programmed coresight system must be protected when in use.
> > > That includes CTI devices. Otherwise the integrity of the captured
> > > trace could be compromised, and indeed there may be no trace captured
> > > if this is relying on CTI triggers.
> > >
> >
> > That's correct.  If CTI is not there, the trace may be wrong or no
> > trace as all.
> >
> > > >For example,
> > > > One cti is required for ETM tracing session. If cti module is
> > > > unloaded before enable that trace session. The enablement will
> > > > still be successfully done since there's no ect_dev connected
> > > > at all.
> > > >
> > > > My point is holding cti module reference in enable path won't
> > > > fix all the problem. Altenatively, user should be aware that
> > > > unload cti module leads to all cti functioniol failure.
> > > > With current design, the behavior is consistant on cti. User
> > > > can unload cti module whenever he wants but it will break
> > > > the function.
> > >
> > > Precisely - and this is what should never be allowed - why can the CTI
> > > be used to break the trace system but not other components?
> > >
> >
> 
> Sorry - I may not have been clear here - I do not want to make having
> a CTI mandatory for enabling the trace session - this is an optional
> component.
>
> > We can't prevent user to unload CTI module when there's no active
> > trace session.
> 
> Agreed - the same applies to all the other components.
> 
> > If we don't allow CTI to be not available, we need to
> > check whether CTI is there in build_path routine or coresight enable
> > routine. We need return failure if CTI is not there when user enables
> > the trace session. Currently, coresight framework check associate CTI
> > device, but if CTI deivce is not probed due to probe failure or
> > module unload, coresight framework assume there's no assocaite CTI
> > device and just continue the enablement.  It won't return failure.
> >
> > If we have plan to change CTI check to be mandatory, make the coresight
> > enablement fail if CTI is not there like other trace components.  I'm
> > perfect fine to add module_get/module_put for CTI.
> >
> 
> If the CTI is present at the time the trace path is created, then it
> must be enabled, and a failure to enable the CTI fails the entire path
> enable - this is the operation that is implemented by
> coresight_control_assoc_ectdev() at this time.
> If a CTI is not present at the time the trace path is created, then as
> you have said previously, the ect_dev pointer will be NULL and the
> check and enable of CTI will be skipped by
> coresight_control_assoc_ectdev() and have no effect on the trace path.
> 
> For the module version, if CTI is present and enabled then the module
> must be locked so it cannot be removed while the trace is active -
> this is the only change that is required. So adding get / put module
> in coresight_control_assoc_ectdev() will lock the module in for the
> duration that trace is active.
> 
> I've tested the code below to ensure that the cti module is correctly
> locked while trace is active, but can be removed as required when
> trace is inactive.
>

Thanks a lot for coming up with the patch and testing it, Mike. 
 
> Regards
> 
> Mike
> 
> ---------------------------------------------------------------------------------
> static int
> coresight_control_assoc_ectdev(struct coresight_device *csdev, bool enable)
> {
>     int ect_ret = 0;
>     struct coresight_device *ect_csdev = csdev->ect_dev;
>     struct module *mod;
> 
>     if (!ect_csdev)
>         return 0;
> 
>     if ((!ect_ops(ect_csdev)->enable) || (!ect_ops(ect_csdev)->disable))
>         return 0;
> 
>     mod = ect_csdev->dev.parent->driver->owner;
> 
>     if (enable) {
>         if (try_module_get(mod)) {
>             ect_ret = ect_ops(ect_csdev)->enable(ect_csdev);
>             if (ect_ret)
>                 module_put(mod);
>         } else
>             ect_ret = -ENODEV;
>     } else {
>         module_put(mod);
>         ect_ret = ect_ops(ect_csdev)->disable(ect_csdev);
>     }

If CTI module is removed before enablement of ETM and installed after ETM
enablement and before ETM disablement. We have a unbalanced module
reference number here. I made one test to prove this with above code.

We may need move module_put() after disable(). Return proper error from
disable() to indicate CTI was not enabled before so we can skip
module_put().

Tests:
db845c:/sys/bus/coresight/devices # echo 1 > tmc_etr0/enable_sink
db845c:/sys/bus/coresight/devices # echo 1 > etm1/enable_source
db845c:/sys/bus/coresight/devices # rmmod coresight_cti
rmmod: failed to unload coresight_cti: Try again
1|db845c:/sys/bus/coresight/devices # echo 0 > etm1/enable_source
db845c:/sys/bus/coresight/devices # rmmod coresight_cti
db845c:/sys/bus/coresight/devices # echo 1 > etm1/enable_source
db845c:/sys/bus/coresight/devices # insmod /data/modules/coresight-cti.ko
db845c:/sys/bus/coresight/devices # echo 1 > etm2/enable_source
db845c:/sys/bus/coresight/devices # echo 0 > etm1/enable_source
db845c:/sys/bus/coresight/devices # rmmod coresight_cti
db845c:/sys/bus/coresight/devices # 

Above module removal should fail since etm2 is still active now.

> 
>     /* output warning if ECT enable is preventing trace operation */
>     if (ect_ret)
>         dev_info(&csdev->dev, "Associated ECT device (%s) %s failed\n",
>              dev_name(&ect_csdev->dev),
>              enable ? "enable" : "disable");
>     return ect_ret;
> }
> -----------------------------------------------------------------------------------------
> Tests:-
> -----------------------
> root@linaro-developer:/home/linaro/scripts# ./trace-sysfs-start.bash
> Tracing via sysfs
> enabling etf
> wait before enable etm
> enabling etm 0.
> enabling etm 1.
> enabling etm 2.
> enabling etm 3.
> waiting for trace
> root@linaro-developer:/home/linaro/scripts# rmmod 
> ../cs-mods/coresight-etm4x.ko
> rmmod: ERROR: Module coresight_etm4x is in use
> root@linaro-developer:/home/linaro/scripts# rmmod 
> ../cs-mods/coresight-cti.ko
> rmmod: ERROR: Module coresight_cti is in use
> root@linaro-developer:/home/linaro/scripts# ./trace-sysfs-end.bash
> ending trace
> disabling etm 0.
> disabling etm 1.
> disabling etm 2.
> disabling etm 3.
> disabling etf
> 16+0 records in
> 16+0 records out
> 8192 bytes (8.2 kB) copied, 0.000834116 s, 9.8 MB/s
> root@linaro-developer:/home/linaro/scripts# rmmod 
> ../cs-mods/coresight-cti.ko
> root@linaro-developer:/home/linaro/scripts# rmmod 
> ../cs-mods/coresight-etm4x.ko
> root@linaro-developer:/home/linaro/scripts# insmod 
> ../cs-mods/coresight-etm4x.ko
> root@linaro-developer:/home/linaro/scripts# ./trace-sysfs-start.bash
> Tracing via sysfs
> enabling etf
> wait before enable etm
> enabling etm 0.
> enabling etm 1.
> enabling etm 2.
> enabling etm 3.
> waiting for trace
> root@linaro-developer:/home/linaro/scripts# rmmod 
> ../cs-mods/coresight-etm4x.ko
> rmmod: ERROR: Module coresight_etm4x is in use
> root@linaro-developer:/home/linaro/scripts# rmmod 
> ../cs-mods/coresight-cti.ko
> rmmod: ERROR: Module coresight_cti is not currently loaded
> root@linaro-developer:/home/linaro/scripts# ./trace-sysfs-end.bash
> ending trace
> disabling etm 0.
> disabling etm 1.
> disabling etm 2.
> disabling etm 3.
> disabling etf
> 16+0 records in
> 16+0 records out
> 8192 bytes (8.2 kB) copied, 0.000819117 s, 10.0 MB/s
> 
> }
> 
> 
> 
> > > Moving forwards we will see users who simply use pre-programmed
> > > settings from client programs such as perf. It may be that some users
> > > do not have the credentials to add and remove modules - this could be
> > > an admin function where larger systems with multiple sessions are in
> > > use.
> > >
> > >
> > > Regards
> > >
> > > Mike
> > >
> > >
> > > >
> > > > Thanks, Tingwei
> > > >
> > > > > Regards
> > > > >
> > > > > Mike
> > > > >
> > > > > > Let me know if you still think we need hold the module reference.
> > > > > >
> > > > > > Thanks, Tingwei
> > > > > > > Regards
> > > > > > >
> > > > > > > Mike
> > > > > > >
> > > > > > >
> > > > > > > --
> > > > > > > Mike Leach
> > > > > > > Principal Engineer, ARM Ltd.
> > > > > > > Manchester Design Centre. UK
> > > > > > >
> > > > > > > _______________________________________________
> > > > > > > linux-arm-kernel mailing list
> > > > > > > linux-arm-kernel@lists.infradead.org
> > > > > > > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> > > > >
> > > > >
> > > > >
> > > > > --
> > > > > Mike Leach
> > > > > Principal Engineer, ARM Ltd.
> > > > > Manchester Design Centre. UK
> > > > >
> > > > > _______________________________________________
> > > > > linux-arm-kernel mailing list
> > > > > linux-arm-kernel@lists.infradead.org
> > > > > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> > >
> > >
> > >
> > > --
> > > Mike Leach
> > > Principal Engineer, ARM Ltd.
> > > Manchester Design Centre. UK
> > >
> > > _______________________________________________
> > > linux-arm-kernel mailing list
> > > linux-arm-kernel@lists.infradead.org
> > > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 
> 
> 
> --
> Mike Leach
> Principal Engineer, ARM Ltd.
> Manchester Design Centre. UK
Mike Leach July 28, 2020, 8:25 p.m. UTC | #15
Hi Tingwei,

On Tue, 28 Jul 2020 at 12:17, Tingwei Zhang <tingweiz@codeaurora.org> wrote:
>
> Hi Mike,
> On Tue, Jul 28, 2020 at 06:08:37PM +0800, Mike Leach wrote:
> > Hi Tingwei,
> >
> > On Tue, 28 Jul 2020 at 02:07, Tingwei Zhang <tingweiz@codeaurora.org> wrote:
> > >
> > > Hi Mike,
> > >
> > > On Tue, Jul 28, 2020 at 01:04:21AM +0800, Mike Leach wrote:
> > > > Hi Tingwei,
> > > >
> > > > On Sun, 26 Jul 2020 at 02:32, Tingwei Zhang <tingweiz@codeaurora.org>
> > > > wrote:
> > > > >
> > > > > Hi Mike,
> > > > > On Sat, Jul 25, 2020 at 09:51:31PM +0800, Mike Leach wrote:
> > > > > > Hi Tingwei,
> > > > > >
> > > > > > On Sat, 25 Jul 2020 at 11:05, Tingwei Zhang
> > > > > > <tingweiz@codeaurora.org>
> > > > > > wrote:
> > > > > > >
> > > > > > > Hi Mike,
> > > > > > >
> > > > > > >  Thu, Jul 23, 2020 at 06:55:47PM +0800, Mike Leach wrote:
> > > > > > > > Hi Tingwei,
> > > > > > > >
> > > > > > > > On Thu, 23 Jul 2020 at 05:28, Tingwei Zhang
> > > > <tingwei@codeaurora.org>
> > > > > > > > wrote:
> > > > > > > > >
> > > > > > > > > When coresight device is in an active session, driver module
> > > > > > > > > of
> > > > > > > > > that device should not be removed. Use try_get_module() in
> > > > > > > > > coresight_grab_device() to prevent module to be unloaded.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Tingwei Zhang <tingwei@codeaurora.org>
> > > > > > > > > ---
> > > > > > > > >  drivers/hwtracing/coresight/coresight.c | 27
> > > > > > +++++++++++++++++++++----
> > > > > > > > >  1 file changed, 23 insertions(+), 4 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/hwtracing/coresight/coresight.c
> > > > > > > > b/drivers/hwtracing/coresight/coresight.c
> > > > > > > > > index b7151c5f81b1..17bc76ea86ae 100644
> > > > > > > > > --- a/drivers/hwtracing/coresight/coresight.c
> > > > > > > > > +++ b/drivers/hwtracing/coresight/coresight.c
> > > > > > > > > @@ -640,7 +640,7 @@ struct coresight_device
> > > > > > > > *coresight_get_sink_by_id(u32 id)
> > > > > > > > >   * don't appear on the trace path, they should be handled
> > > > > > > > > along
> > > > > > with
> > > > > > > > the
> > > > > > > > >   * the master device.
> > > > > > > > >   */
> > > > > > > > > -static void coresight_grab_device(struct coresight_device
> > > > *csdev)
> > > > > > > > > +static int coresight_grab_device(struct coresight_device
> > > > *csdev)
> > > > > > > > >  {
> > > > > > > > >         int i;
> > > > > > > > >
> > > > > > > > > @@ -648,10 +648,25 @@ static void coresight_grab_device(struct
> > > > > > > > coresight_device *csdev)
> > > > > > > > >                 struct coresight_device *child;
> > > > > > > > >
> > > > > > > > >                 child  = csdev->pdata->conns[i].child_dev;
> > > > > > > > > -               if (child && child->type ==
> > > > > > CORESIGHT_DEV_TYPE_HELPER)
> > > > > > > > > +               if (child && child->type ==
> > > > > > CORESIGHT_DEV_TYPE_HELPER) {
> > > > > > > > > +                       if
> > > > > > > > (!try_module_get(child->dev.parent->driver->owner))
> > > > > > > > > +                               goto err;
> > > > > > > > >
> > > > > > > > > pm_runtime_get_sync(child->dev.parent);
> > > > > > > > > +               }
> > > > > > > > >         }
> > > > > > > > > +       if (!try_module_get(csdev->dev.parent->driver->owner))
> > > > > > > > > +               goto err;
> > > > > > > > >         pm_runtime_get_sync(csdev->dev.parent);
> > > > > > > > > +       return 0;
> > > > > > > > > +err:
> > > > > > > > > +       for (i--; i >= 0; i--) {
> > > > > > > > > +               struct coresight_device *child;
> > > > > > > > > +
> > > > > > > > > +               child  = csdev->pdata->conns[i].child_dev;
> > > > > > > > > +               if (child && child->type ==
> > > > > > CORESIGHT_DEV_TYPE_HELPER)
> > > > > > > > > +                       pm_runtime_put(child->dev.parent);
> > > > > > > > > +       }
> > > > > > > > > +       return -ENODEV;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > >  /*
> > > > > > > > > @@ -663,12 +678,15 @@ static void coresight_drop_device(struct
> > > > > > > > coresight_device *csdev)
> > > > > > > > >         int i;
> > > > > > > > >
> > > > > > > > >         pm_runtime_put(csdev->dev.parent);
> > > > > > > > > +       module_put(csdev->dev.parent->driver->owner);
> > > > > > > > >         for (i = 0; i < csdev->pdata->nr_outport; i++) {
> > > > > > > > >                 struct coresight_device *child;
> > > > > > > > >
> > > > > > > > >                 child  = csdev->pdata->conns[i].child_dev;
> > > > > > > > > -               if (child && child->type ==
> > > > > > CORESIGHT_DEV_TYPE_HELPER)
> > > > > > > > > +               if (child && child->type ==
> > > > > > CORESIGHT_DEV_TYPE_HELPER) {
> > > > > > > > >                         pm_runtime_put(child->dev.parent);
> > > > > > > > > +
> > > > > > module_put(child->dev.parent->driver->owner);
> > > > > > > > > +               }
> > > > > > > > >         }
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > @@ -721,7 +739,8 @@ static int _coresight_build_path(struct
> > > > > > > > coresight_device *csdev,
> > > > > > > > >         if (!node)
> > > > > > > > >                 return -ENOMEM;
> > > > > > > > >
> > > > > > > > > -       coresight_grab_device(csdev);
> > > > > > > > > +       if (coresight_grab_device(csdev))
> > > > > > > > > +               return -ENODEV;
> > > > > > > > >         node->csdev = csdev;
> > > > > > > > >         list_add(&node->link, path);
> > > > > > > > >
> > > > > > > > > --
> > > > > > > > > The Qualcomm Innovation Center, Inc. is a member of the Code
> > > > Aurora
> > > > > > > > Forum,
> > > > > > > > > a Linux Foundation Collaborative Project
> > > > > > > > >
> > > > > > > >
> > > > > > > > The CTI devices are associated with other coresight components
> > > > using
> > > > > > > > csdev->ect_dev; These are not handled in the main linear trace
> > > > path as
> > > > > > > > these have a star topology interlinking all components.
> > > > > > > > However, when a component has an associated ect_dev then is
> > > > enabled at
> > > > > > > > the same time as the other component is enabled.
> > > > > > > >
> > > > > > > > So a module_get/put will be needed for this association to
> > > > > > > > prevent
> > > > the
> > > > > > > > CTI being removed while a trace session is active.
> > > > > > > > I suggest in coresight.c : coresight_control_assoc_ectdev()
> > > > > > > >
> > > > > > >
> > > > > > > In module unload process, devices of that module will be removed.
> > > > > > > In
> > > > > > > this case, cti_remove() is called on all cti device when unloading
> > > > > > > cti module. All cti connections is cleaned at that time.
> > > > > > > csdev->ect_dev is set to NULL. coresight_control_assoc_ectdev()
> > > > > > > will return since since ect_csdev is NULL.
> > > > > > >
> > > > > > > I think we are safe without holding module reference since cti
> > > > > > > driver has done a pretty good job on clean up.
> > > > > > >
> > > > > >
> > > > > > The issue here is not about clean up. We need to keep all the
> > > > > > programmed coresight modules loaded and operational while a
> > > > > > coresight
> > > > > > trace session is in progress. The CTI can be used to control the
> > > > > > generation of trace, trigger trace related events etc.
> > > > > > If the module is removed while a session is in progress then this
> > > > > > programming is lost and the trace data collected may not be what is
> > > > > > expected.
> > > > > >
> > > > >
> > > > > Got your point now. In my opinion, CTI is kinda different to other
> > > > > coresight components since it's optional.
> > > > >
> > > > > For other components, they have to been there when path is built or
> > > > > the enablement fails. Use can either get a successfully return which
> > > > > means it's good and all devices/modules are there until path is
> > > > > released, or a failed return which means trace session can't be
> > > > > setup.
> > > > >
> > > >
> > > > The module get/put do more than ensure that the system can be enabled.
> > > > They ensure that no components in the coresight path can be unloaded
> > > > until all paths in use are released. This ensures that clients do not
> > > > get the trace system pulled out from underneath - corrupting trace
> > > > data, and possibly affecting the client program.
> > > >
> > >
> > > I agree.
> > >
> > > > > In CTI case, there's no garrantee that CTI related to the trace
> > > > > session is there even the enablement is successful.
> > > >
> > > > And there is no guarantee that it is not programmed to perform a key
> > > > task in controlling the generation of trace.
> > > >
> > > > The entire programmed coresight system must be protected when in use.
> > > > That includes CTI devices. Otherwise the integrity of the captured
> > > > trace could be compromised, and indeed there may be no trace captured
> > > > if this is relying on CTI triggers.
> > > >
> > >
> > > That's correct.  If CTI is not there, the trace may be wrong or no
> > > trace as all.
> > >
> > > > >For example,
> > > > > One cti is required for ETM tracing session. If cti module is
> > > > > unloaded before enable that trace session. The enablement will
> > > > > still be successfully done since there's no ect_dev connected
> > > > > at all.
> > > > >
> > > > > My point is holding cti module reference in enable path won't
> > > > > fix all the problem. Altenatively, user should be aware that
> > > > > unload cti module leads to all cti functioniol failure.
> > > > > With current design, the behavior is consistant on cti. User
> > > > > can unload cti module whenever he wants but it will break
> > > > > the function.
> > > >
> > > > Precisely - and this is what should never be allowed - why can the CTI
> > > > be used to break the trace system but not other components?
> > > >
> > >
> >
> > Sorry - I may not have been clear here - I do not want to make having
> > a CTI mandatory for enabling the trace session - this is an optional
> > component.
> >
> > > We can't prevent user to unload CTI module when there's no active
> > > trace session.
> >
> > Agreed - the same applies to all the other components.
> >
> > > If we don't allow CTI to be not available, we need to
> > > check whether CTI is there in build_path routine or coresight enable
> > > routine. We need return failure if CTI is not there when user enables
> > > the trace session. Currently, coresight framework check associate CTI
> > > device, but if CTI deivce is not probed due to probe failure or
> > > module unload, coresight framework assume there's no assocaite CTI
> > > device and just continue the enablement.  It won't return failure.
> > >
> > > If we have plan to change CTI check to be mandatory, make the coresight
> > > enablement fail if CTI is not there like other trace components.  I'm
> > > perfect fine to add module_get/module_put for CTI.
> > >
> >
> > If the CTI is present at the time the trace path is created, then it
> > must be enabled, and a failure to enable the CTI fails the entire path
> > enable - this is the operation that is implemented by
> > coresight_control_assoc_ectdev() at this time.
> > If a CTI is not present at the time the trace path is created, then as
> > you have said previously, the ect_dev pointer will be NULL and the
> > check and enable of CTI will be skipped by
> > coresight_control_assoc_ectdev() and have no effect on the trace path.
> >
> > For the module version, if CTI is present and enabled then the module
> > must be locked so it cannot be removed while the trace is active -
> > this is the only change that is required. So adding get / put module
> > in coresight_control_assoc_ectdev() will lock the module in for the
> > duration that trace is active.
> >
> > I've tested the code below to ensure that the cti module is correctly
> > locked while trace is active, but can be removed as required when
> > trace is inactive.
> >
>
> Thanks a lot for coming up with the patch and testing it, Mike.
>
> > Regards
> >
> > Mike
> >
> > ---------------------------------------------------------------------------------
> > static int
> > coresight_control_assoc_ectdev(struct coresight_device *csdev, bool enable)
> > {
> >     int ect_ret = 0;
> >     struct coresight_device *ect_csdev = csdev->ect_dev;
> >     struct module *mod;
> >
> >     if (!ect_csdev)
> >         return 0;
> >
> >     if ((!ect_ops(ect_csdev)->enable) || (!ect_ops(ect_csdev)->disable))
> >         return 0;
> >
> >     mod = ect_csdev->dev.parent->driver->owner;
> >
> >     if (enable) {
> >         if (try_module_get(mod)) {
> >             ect_ret = ect_ops(ect_csdev)->enable(ect_csdev);
> >             if (ect_ret)
> >                 module_put(mod);
> >         } else
> >             ect_ret = -ENODEV;
> >     } else {
> >         module_put(mod);
> >         ect_ret = ect_ops(ect_csdev)->disable(ect_csdev);
> >     }
>
> If CTI module is removed before enablement of ETM and installed after ETM
> enablement and before ETM disablement. We have a unbalanced module
> reference number here. I made one test to prove this with above code.
>
> We may need move module_put() after disable(). Return proper error from
> disable() to indicate CTI was not enabled before so we can skip
> module_put().
>
> Tests:
> db845c:/sys/bus/coresight/devices # echo 1 > tmc_etr0/enable_sink
> db845c:/sys/bus/coresight/devices # echo 1 > etm1/enable_source
> db845c:/sys/bus/coresight/devices # rmmod coresight_cti
> rmmod: failed to unload coresight_cti: Try again
> 1|db845c:/sys/bus/coresight/devices # echo 0 > etm1/enable_source
> db845c:/sys/bus/coresight/devices # rmmod coresight_cti
> db845c:/sys/bus/coresight/devices # echo 1 > etm1/enable_source
> db845c:/sys/bus/coresight/devices # insmod /data/modules/coresight-cti.ko
> db845c:/sys/bus/coresight/devices # echo 1 > etm2/enable_source
> db845c:/sys/bus/coresight/devices # echo 0 > etm1/enable_source
> db845c:/sys/bus/coresight/devices # rmmod coresight_cti
> db845c:/sys/bus/coresight/devices #
>
> Above module removal should fail since etm2 is still active now.
>

Good point. Not sure this is sensible user operation, but it is
something that needs to be handled.
The issue goes beyond just the module count - the CTI driver has an
internal enable/disable refcount that is affected by this too.

I think the best way to handle this late CTI load is to flag which
Coresight devices have actually enabled the CTI, and only disable for
these.
So given a new flag in coresight_device, the following is possible:-

    if (enable) {
        if (try_module_get(mod)) {
            ect_ret = ect_ops(ect_csdev)->enable(ect_csdev);
            if (ect_ret)
                module_put(mod);
            else
                csdev->ect_enable = true;
        } else
            ect_ret = -ENODEV;
    } else {
        if (csdev->ect_enable) {
            module_put(mod);
            ect_ret = ect_ops(ect_csdev)->disable(ect_csdev);
            csdev->ect_enable = false;
        }
    }

During testing I have found that removing the etm4x module before the
cti module results in a crash. This is due to a bug in the cti driver
callback that removes sysfs links - and an issue in the coresight-core
where that callback is called.
I'll forward detail for all this tomorrow.

Regards

Mike





> >
> >     /* output warning if ECT enable is preventing trace operation */
> >     if (ect_ret)
> >         dev_info(&csdev->dev, "Associated ECT device (%s) %s failed\n",
> >              dev_name(&ect_csdev->dev),
> >              enable ? "enable" : "disable");
> >     return ect_ret;
> > }
> > -----------------------------------------------------------------------------------------
> > Tests:-
> > -----------------------
> > root@linaro-developer:/home/linaro/scripts# ./trace-sysfs-start.bash
> > Tracing via sysfs
> > enabling etf
> > wait before enable etm
> > enabling etm 0.
> > enabling etm 1.
> > enabling etm 2.
> > enabling etm 3.
> > waiting for trace
> > root@linaro-developer:/home/linaro/scripts# rmmod
> > ../cs-mods/coresight-etm4x.ko
> > rmmod: ERROR: Module coresight_etm4x is in use
> > root@linaro-developer:/home/linaro/scripts# rmmod
> > ../cs-mods/coresight-cti.ko
> > rmmod: ERROR: Module coresight_cti is in use
> > root@linaro-developer:/home/linaro/scripts# ./trace-sysfs-end.bash
> > ending trace
> > disabling etm 0.
> > disabling etm 1.
> > disabling etm 2.
> > disabling etm 3.
> > disabling etf
> > 16+0 records in
> > 16+0 records out
> > 8192 bytes (8.2 kB) copied, 0.000834116 s, 9.8 MB/s
> > root@linaro-developer:/home/linaro/scripts# rmmod
> > ../cs-mods/coresight-cti.ko
> > root@linaro-developer:/home/linaro/scripts# rmmod
> > ../cs-mods/coresight-etm4x.ko
> > root@linaro-developer:/home/linaro/scripts# insmod
> > ../cs-mods/coresight-etm4x.ko
> > root@linaro-developer:/home/linaro/scripts# ./trace-sysfs-start.bash
> > Tracing via sysfs
> > enabling etf
> > wait before enable etm
> > enabling etm 0.
> > enabling etm 1.
> > enabling etm 2.
> > enabling etm 3.
> > waiting for trace
> > root@linaro-developer:/home/linaro/scripts# rmmod
> > ../cs-mods/coresight-etm4x.ko
> > rmmod: ERROR: Module coresight_etm4x is in use
> > root@linaro-developer:/home/linaro/scripts# rmmod
> > ../cs-mods/coresight-cti.ko
> > rmmod: ERROR: Module coresight_cti is not currently loaded
> > root@linaro-developer:/home/linaro/scripts# ./trace-sysfs-end.bash
> > ending trace
> > disabling etm 0.
> > disabling etm 1.
> > disabling etm 2.
> > disabling etm 3.
> > disabling etf
> > 16+0 records in
> > 16+0 records out
> > 8192 bytes (8.2 kB) copied, 0.000819117 s, 10.0 MB/s
> >
> > }
> >
> >
> >
> > > > Moving forwards we will see users who simply use pre-programmed
> > > > settings from client programs such as perf. It may be that some users
> > > > do not have the credentials to add and remove modules - this could be
> > > > an admin function where larger systems with multiple sessions are in
> > > > use.
> > > >
> > > >
> > > > Regards
> > > >
> > > > Mike
> > > >
> > > >
> > > > >
> > > > > Thanks, Tingwei
> > > > >
> > > > > > Regards
> > > > > >
> > > > > > Mike
> > > > > >
> > > > > > > Let me know if you still think we need hold the module reference.
> > > > > > >
> > > > > > > Thanks, Tingwei
> > > > > > > > Regards
> > > > > > > >
> > > > > > > > Mike
> > > > > > > >
> > > > > > > >
> > > > > > > > --
> > > > > > > > Mike Leach
> > > > > > > > Principal Engineer, ARM Ltd.
> > > > > > > > Manchester Design Centre. UK
> > > > > > > >
> > > > > > > > _______________________________________________
> > > > > > > > linux-arm-kernel mailing list
> > > > > > > > linux-arm-kernel@lists.infradead.org
> > > > > > > > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> > > > > >
> > > > > >
> > > > > >
> > > > > > --
> > > > > > Mike Leach
> > > > > > Principal Engineer, ARM Ltd.
> > > > > > Manchester Design Centre. UK
> > > > > >
> > > > > > _______________________________________________
> > > > > > linux-arm-kernel mailing list
> > > > > > linux-arm-kernel@lists.infradead.org
> > > > > > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> > > >
> > > >
> > > >
> > > > --
> > > > Mike Leach
> > > > Principal Engineer, ARM Ltd.
> > > > Manchester Design Centre. UK
> > > >
> > > > _______________________________________________
> > > > linux-arm-kernel mailing list
> > > > linux-arm-kernel@lists.infradead.org
> > > > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> >
> >
> >
> > --
> > Mike Leach
> > Principal Engineer, ARM Ltd.
> > Manchester Design Centre. UK



--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
diff mbox series

Patch

diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index b7151c5f81b1..17bc76ea86ae 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -640,7 +640,7 @@  struct coresight_device *coresight_get_sink_by_id(u32 id)
  * don't appear on the trace path, they should be handled along with the
  * the master device.
  */
-static void coresight_grab_device(struct coresight_device *csdev)
+static int coresight_grab_device(struct coresight_device *csdev)
 {
 	int i;
 
@@ -648,10 +648,25 @@  static void coresight_grab_device(struct coresight_device *csdev)
 		struct coresight_device *child;
 
 		child  = csdev->pdata->conns[i].child_dev;
-		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
+		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) {
+			if (!try_module_get(child->dev.parent->driver->owner))
+				goto err;
 			pm_runtime_get_sync(child->dev.parent);
+		}
 	}
+	if (!try_module_get(csdev->dev.parent->driver->owner))
+		goto err;
 	pm_runtime_get_sync(csdev->dev.parent);
+	return 0;
+err:
+	for (i--; i >= 0; i--) {
+		struct coresight_device *child;
+
+		child  = csdev->pdata->conns[i].child_dev;
+		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
+			pm_runtime_put(child->dev.parent);
+	}
+	return -ENODEV;
 }
 
 /*
@@ -663,12 +678,15 @@  static void coresight_drop_device(struct coresight_device *csdev)
 	int i;
 
 	pm_runtime_put(csdev->dev.parent);
+	module_put(csdev->dev.parent->driver->owner);
 	for (i = 0; i < csdev->pdata->nr_outport; i++) {
 		struct coresight_device *child;
 
 		child  = csdev->pdata->conns[i].child_dev;
-		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
+		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) {
 			pm_runtime_put(child->dev.parent);
+			module_put(child->dev.parent->driver->owner);
+		}
 	}
 }
 
@@ -721,7 +739,8 @@  static int _coresight_build_path(struct coresight_device *csdev,
 	if (!node)
 		return -ENOMEM;
 
-	coresight_grab_device(csdev);
+	if (coresight_grab_device(csdev))
+		return -ENODEV;
 	node->csdev = csdev;
 	list_add(&node->link, path);