diff mbox series

[v1] PM-runtime: Check supplier_preactivated before release supplier

Message ID 20220613120755.14306-1-peter.wang@mediatek.com (mailing list archive)
State Superseded, archived
Headers show
Series [v1] PM-runtime: Check supplier_preactivated before release supplier | expand

Commit Message

Peter Wang (王信友) June 13, 2022, 12:07 p.m. UTC
From: Peter Wang <peter.wang@mediatek.com>

With divice link of DL_FLAG_PM_RUNTIME, if consumer call pm_runtime_get_suppliers
to prevent supplier enter suspend, pm_runtime_release_supplier should
check supplier_preactivated before let supplier enter suspend.

If the link is drop or release, bypass check supplier_preactivated.

Signed-off-by: Peter Wang <peter.wang@mediatek.com>
---
 drivers/base/core.c          |  2 +-
 drivers/base/power/runtime.c | 15 ++++++++++++---
 include/linux/pm_runtime.h   |  5 +++--
 3 files changed, 16 insertions(+), 6 deletions(-)

Comments

Peter Wang (王信友) June 22, 2022, 6:09 a.m. UTC | #1
Hi all,


gentle ping for this bug fix review.


Thanks.


On 6/13/22 8:07 PM, peter.wang@mediatek.com wrote:
> From: Peter Wang <peter.wang@mediatek.com>
>
> With divice link of DL_FLAG_PM_RUNTIME, if consumer call pm_runtime_get_suppliers
> to prevent supplier enter suspend, pm_runtime_release_supplier should
> check supplier_preactivated before let supplier enter suspend.
>
> If the link is drop or release, bypass check supplier_preactivated.
>
> Signed-off-by: Peter Wang <peter.wang@mediatek.com>
> ---
>   drivers/base/core.c          |  2 +-
>   drivers/base/power/runtime.c | 15 ++++++++++++---
>   include/linux/pm_runtime.h   |  5 +++--
>   3 files changed, 16 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/base/core.c b/drivers/base/core.c
> index 7cd789c4985d..3b9cc559928f 100644
> --- a/drivers/base/core.c
> +++ b/drivers/base/core.c
> @@ -486,7 +486,7 @@ static void device_link_release_fn(struct work_struct *work)
>   	/* Ensure that all references to the link object have been dropped. */
>   	device_link_synchronize_removal();
>   
> -	pm_runtime_release_supplier(link, true);
> +	pm_runtime_release_supplier(link, true, true);
>   
>   	put_device(link->consumer);
>   	put_device(link->supplier);
> diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
> index 676dc72d912d..3c4f425937a1 100644
> --- a/drivers/base/power/runtime.c
> +++ b/drivers/base/power/runtime.c
> @@ -314,10 +314,19 @@ static int rpm_get_suppliers(struct device *dev)
>    * and if @check_idle is set, check if that device is idle (and so it can be
>    * suspended).
>    */
> -void pm_runtime_release_supplier(struct device_link *link, bool check_idle)
> +void pm_runtime_release_supplier(struct device_link *link, bool check_idle,
> +	bool drop)
>   {
>   	struct device *supplier = link->supplier;
>   
> +	/*
> +	 * When consumer hold supplier, supplier cannot enter suspend.
> +	 * Driect release supplier and let supplier enter suspend is not allow.
> +	 * Unless the link is drop, direct relsease supplier should be okay.
> +	 */
> +	if (link->supplier_preactivated && !drop)
> +		return;
> +
>   	/*
>   	 * The additional power.usage_count check is a safety net in case
>   	 * the rpm_active refcount becomes saturated, in which case
> @@ -338,7 +347,7 @@ static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend)
>   
>   	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
>   				device_links_read_lock_held())
> -		pm_runtime_release_supplier(link, try_to_suspend);
> +		pm_runtime_release_supplier(link, try_to_suspend, false);
>   }
>   
>   static void rpm_put_suppliers(struct device *dev)
> @@ -1838,7 +1847,7 @@ void pm_runtime_drop_link(struct device_link *link)
>   		return;
>   
>   	pm_runtime_drop_link_count(link->consumer);
> -	pm_runtime_release_supplier(link, true);
> +	pm_runtime_release_supplier(link, true, true);
>   }
>   
>   static bool pm_runtime_need_not_resume(struct device *dev)
> diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
> index 9e4d056967c6..354ffb1eaec0 100644
> --- a/include/linux/pm_runtime.h
> +++ b/include/linux/pm_runtime.h
> @@ -88,7 +88,8 @@ extern void pm_runtime_get_suppliers(struct device *dev);
>   extern void pm_runtime_put_suppliers(struct device *dev);
>   extern void pm_runtime_new_link(struct device *dev);
>   extern void pm_runtime_drop_link(struct device_link *link);
> -extern void pm_runtime_release_supplier(struct device_link *link, bool check_idle);
> +extern void pm_runtime_release_supplier(struct device_link *link,
> +	bool check_idle, bool drop);
>   
>   extern int devm_pm_runtime_enable(struct device *dev);
>   
> @@ -315,7 +316,7 @@ static inline void pm_runtime_put_suppliers(struct device *dev) {}
>   static inline void pm_runtime_new_link(struct device *dev) {}
>   static inline void pm_runtime_drop_link(struct device_link *link) {}
>   static inline void pm_runtime_release_supplier(struct device_link *link,
> -					       bool check_idle) {}
> +					       bool check_idle, bool drop) {}
>   
>   #endif /* !CONFIG_PM */
>
Greg Kroah-Hartman June 22, 2022, 6:48 a.m. UTC | #2
A: http://en.wikipedia.org/wiki/Top_post
Q: Were do I find info about this thing called top-posting?
A: Because it messes up the order in which people normally read text.
Q: Why is top-posting such a bad thing?
A: Top-posting.
Q: What is the most annoying thing in e-mail?

A: No.
Q: Should I include quotations after my reply?


http://daringfireball.net/2007/07/on_top

On Wed, Jun 22, 2022 at 02:09:16PM +0800, Peter Wang wrote:
> Hi all,
> 
> 
> gentle ping for this bug fix review.

It's only been 1 week, please give us a chance.  To help out, always
feel free to review other patch submissions.

thanks,

greg k-h
Greg Kroah-Hartman June 27, 2022, 2:14 p.m. UTC | #3
On Mon, Jun 13, 2022 at 08:07:55PM +0800, peter.wang@mediatek.com wrote:
> From: Peter Wang <peter.wang@mediatek.com>
> 
> With divice link of DL_FLAG_PM_RUNTIME, if consumer call pm_runtime_get_suppliers
> to prevent supplier enter suspend, pm_runtime_release_supplier should
> check supplier_preactivated before let supplier enter suspend.
> 
> If the link is drop or release, bypass check supplier_preactivated.
> 
> Signed-off-by: Peter Wang <peter.wang@mediatek.com>
> ---
>  drivers/base/core.c          |  2 +-
>  drivers/base/power/runtime.c | 15 ++++++++++++---
>  include/linux/pm_runtime.h   |  5 +++--
>  3 files changed, 16 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/base/core.c b/drivers/base/core.c
> index 7cd789c4985d..3b9cc559928f 100644
> --- a/drivers/base/core.c
> +++ b/drivers/base/core.c
> @@ -486,7 +486,7 @@ static void device_link_release_fn(struct work_struct *work)
>  	/* Ensure that all references to the link object have been dropped. */
>  	device_link_synchronize_removal();
>  
> -	pm_runtime_release_supplier(link, true);
> +	pm_runtime_release_supplier(link, true, true);
>  
>  	put_device(link->consumer);
>  	put_device(link->supplier);
> diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
> index 676dc72d912d..3c4f425937a1 100644
> --- a/drivers/base/power/runtime.c
> +++ b/drivers/base/power/runtime.c
> @@ -314,10 +314,19 @@ static int rpm_get_suppliers(struct device *dev)
>   * and if @check_idle is set, check if that device is idle (and so it can be
>   * suspended).
>   */
> -void pm_runtime_release_supplier(struct device_link *link, bool check_idle)
> +void pm_runtime_release_supplier(struct device_link *link, bool check_idle,
> +	bool drop)

This is just making this horrible api even worse.  Now there are 2
boolean flags required, 2 more than really should even be here at all.
Every time you see this function being used, you will now have to look
up the definition  to see what it really does.

Please make a new function that calls the internal function with the
flag set properly, so that it is obvious what is happening when the call
is made.

and really, the same thing should be done for the check_idle flag,
that's not good either.

thanks,

greg k-h
Rafael J. Wysocki June 27, 2022, 2:27 p.m. UTC | #4
On Mon, Jun 27, 2022 at 4:14 PM Greg KH <gregkh@linuxfoundation.org> wrote:
>
> On Mon, Jun 13, 2022 at 08:07:55PM +0800, peter.wang@mediatek.com wrote:
> > From: Peter Wang <peter.wang@mediatek.com>
> >
> > With divice link of DL_FLAG_PM_RUNTIME, if consumer call pm_runtime_get_suppliers
> > to prevent supplier enter suspend, pm_runtime_release_supplier should
> > check supplier_preactivated before let supplier enter suspend.
> >
> > If the link is drop or release, bypass check supplier_preactivated.
> >
> > Signed-off-by: Peter Wang <peter.wang@mediatek.com>
> > ---
> >  drivers/base/core.c          |  2 +-
> >  drivers/base/power/runtime.c | 15 ++++++++++++---
> >  include/linux/pm_runtime.h   |  5 +++--
> >  3 files changed, 16 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/base/core.c b/drivers/base/core.c
> > index 7cd789c4985d..3b9cc559928f 100644
> > --- a/drivers/base/core.c
> > +++ b/drivers/base/core.c
> > @@ -486,7 +486,7 @@ static void device_link_release_fn(struct work_struct *work)
> >       /* Ensure that all references to the link object have been dropped. */
> >       device_link_synchronize_removal();
> >
> > -     pm_runtime_release_supplier(link, true);
> > +     pm_runtime_release_supplier(link, true, true);
> >
> >       put_device(link->consumer);
> >       put_device(link->supplier);
> > diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
> > index 676dc72d912d..3c4f425937a1 100644
> > --- a/drivers/base/power/runtime.c
> > +++ b/drivers/base/power/runtime.c
> > @@ -314,10 +314,19 @@ static int rpm_get_suppliers(struct device *dev)
> >   * and if @check_idle is set, check if that device is idle (and so it can be
> >   * suspended).
> >   */
> > -void pm_runtime_release_supplier(struct device_link *link, bool check_idle)
> > +void pm_runtime_release_supplier(struct device_link *link, bool check_idle,
> > +     bool drop)
>
> This is just making this horrible api even worse.  Now there are 2
> boolean flags required, 2 more than really should even be here at all.
> Every time you see this function being used, you will now have to look
> up the definition  to see what it really does.
>
> Please make a new function that calls the internal function with the
> flag set properly, so that it is obvious what is happening when the call
> is made.
>
> and really, the same thing should be done for the check_idle flag,
> that's not good either.

Agreed, and let me take care of this.
Rafael J. Wysocki June 27, 2022, 7 p.m. UTC | #5
On Mon, Jun 13, 2022 at 2:08 PM <peter.wang@mediatek.com> wrote:
>
> From: Peter Wang <peter.wang@mediatek.com>
>
> With divice link of DL_FLAG_PM_RUNTIME, if consumer call pm_runtime_get_suppliers
> to prevent supplier enter suspend, pm_runtime_release_supplier should
> check supplier_preactivated before let supplier enter suspend.

Why?

> If the link is drop or release, bypass check supplier_preactivated.
>
> Signed-off-by: Peter Wang <peter.wang@mediatek.com>
> ---
>  drivers/base/core.c          |  2 +-
>  drivers/base/power/runtime.c | 15 ++++++++++++---
>  include/linux/pm_runtime.h   |  5 +++--
>  3 files changed, 16 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/base/core.c b/drivers/base/core.c
> index 7cd789c4985d..3b9cc559928f 100644
> --- a/drivers/base/core.c
> +++ b/drivers/base/core.c
> @@ -486,7 +486,7 @@ static void device_link_release_fn(struct work_struct *work)
>         /* Ensure that all references to the link object have been dropped. */
>         device_link_synchronize_removal();
>
> -       pm_runtime_release_supplier(link, true);
> +       pm_runtime_release_supplier(link, true, true);
>
>         put_device(link->consumer);
>         put_device(link->supplier);
> diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
> index 676dc72d912d..3c4f425937a1 100644
> --- a/drivers/base/power/runtime.c
> +++ b/drivers/base/power/runtime.c
> @@ -314,10 +314,19 @@ static int rpm_get_suppliers(struct device *dev)
>   * and if @check_idle is set, check if that device is idle (and so it can be
>   * suspended).
>   */
> -void pm_runtime_release_supplier(struct device_link *link, bool check_idle)
> +void pm_runtime_release_supplier(struct device_link *link, bool check_idle,
> +       bool drop)
>  {
>         struct device *supplier = link->supplier;
>
> +       /*
> +        * When consumer hold supplier, supplier cannot enter suspend.
> +        * Driect release supplier and let supplier enter suspend is not allow.
> +        * Unless the link is drop, direct relsease supplier should be okay.
> +        */
> +       if (link->supplier_preactivated && !drop)
> +               return;
> +
>         /*
>          * The additional power.usage_count check is a safety net in case
>          * the rpm_active refcount becomes saturated, in which case
> @@ -338,7 +347,7 @@ static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend)
>
>         list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
>                                 device_links_read_lock_held())
> -               pm_runtime_release_supplier(link, try_to_suspend);
> +               pm_runtime_release_supplier(link, try_to_suspend, false);
>  }
>
>  static void rpm_put_suppliers(struct device *dev)
> @@ -1838,7 +1847,7 @@ void pm_runtime_drop_link(struct device_link *link)
>                 return;
>
>         pm_runtime_drop_link_count(link->consumer);
> -       pm_runtime_release_supplier(link, true);
> +       pm_runtime_release_supplier(link, true, true);
>  }
>
>  static bool pm_runtime_need_not_resume(struct device *dev)
> diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
> index 9e4d056967c6..354ffb1eaec0 100644
> --- a/include/linux/pm_runtime.h
> +++ b/include/linux/pm_runtime.h
> @@ -88,7 +88,8 @@ extern void pm_runtime_get_suppliers(struct device *dev);
>  extern void pm_runtime_put_suppliers(struct device *dev);
>  extern void pm_runtime_new_link(struct device *dev);
>  extern void pm_runtime_drop_link(struct device_link *link);
> -extern void pm_runtime_release_supplier(struct device_link *link, bool check_idle);
> +extern void pm_runtime_release_supplier(struct device_link *link,
> +       bool check_idle, bool drop);
>
>  extern int devm_pm_runtime_enable(struct device *dev);
>
> @@ -315,7 +316,7 @@ static inline void pm_runtime_put_suppliers(struct device *dev) {}
>  static inline void pm_runtime_new_link(struct device *dev) {}
>  static inline void pm_runtime_drop_link(struct device_link *link) {}
>  static inline void pm_runtime_release_supplier(struct device_link *link,
> -                                              bool check_idle) {}
> +                                              bool check_idle, bool drop) {}
>
>  #endif /* !CONFIG_PM */
>
> --
> 2.18.0
>
Peter Wang (王信友) June 28, 2022, 1:49 a.m. UTC | #6
On 6/27/22 10:14 PM, Greg KH wrote:
> On Mon, Jun 13, 2022 at 08:07:55PM +0800, peter.wang@mediatek.com wrote:
>> From: Peter Wang <peter.wang@mediatek.com>
>>
>> With divice link of DL_FLAG_PM_RUNTIME, if consumer call pm_runtime_get_suppliers
>> to prevent supplier enter suspend, pm_runtime_release_supplier should
>> check supplier_preactivated before let supplier enter suspend.
>>
>> If the link is drop or release, bypass check supplier_preactivated.
>>
>> Signed-off-by: Peter Wang <peter.wang@mediatek.com>
>> ---
>>   drivers/base/core.c          |  2 +-
>>   drivers/base/power/runtime.c | 15 ++++++++++++---
>>   include/linux/pm_runtime.h   |  5 +++--
>>   3 files changed, 16 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/base/core.c b/drivers/base/core.c
>> index 7cd789c4985d..3b9cc559928f 100644
>> --- a/drivers/base/core.c
>> +++ b/drivers/base/core.c
>> @@ -486,7 +486,7 @@ static void device_link_release_fn(struct work_struct *work)
>>   	/* Ensure that all references to the link object have been dropped. */
>>   	device_link_synchronize_removal();
>>   
>> -	pm_runtime_release_supplier(link, true);
>> +	pm_runtime_release_supplier(link, true, true);
>>   
>>   	put_device(link->consumer);
>>   	put_device(link->supplier);
>> diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
>> index 676dc72d912d..3c4f425937a1 100644
>> --- a/drivers/base/power/runtime.c
>> +++ b/drivers/base/power/runtime.c
>> @@ -314,10 +314,19 @@ static int rpm_get_suppliers(struct device *dev)
>>    * and if @check_idle is set, check if that device is idle (and so it can be
>>    * suspended).
>>    */
>> -void pm_runtime_release_supplier(struct device_link *link, bool check_idle)
>> +void pm_runtime_release_supplier(struct device_link *link, bool check_idle,
>> +	bool drop)
> This is just making this horrible api even worse.  Now there are 2
> boolean flags required, 2 more than really should even be here at all.
> Every time you see this function being used, you will now have to look
> up the definition  to see what it really does.
>
> Please make a new function that calls the internal function with the
> flag set properly, so that it is obvious what is happening when the call
> is made.
>
> and really, the same thing should be done for the check_idle flag,
> that's not good either.
>
> thanks,

Hi Gerg,

Good point! you are right, I wont change api next version

Thank you for review


> greg k-h
Peter Wang (王信友) June 28, 2022, 1:53 a.m. UTC | #7
On 6/28/22 3:00 AM, Rafael J. Wysocki wrote:
> On Mon, Jun 13, 2022 at 2:08 PM <peter.wang@mediatek.com> wrote:
>> From: Peter Wang <peter.wang@mediatek.com>
>>
>> With divice link of DL_FLAG_PM_RUNTIME, if consumer call pm_runtime_get_suppliers
>> to prevent supplier enter suspend, pm_runtime_release_supplier should
>> check supplier_preactivated before let supplier enter suspend.
> Why?

because supplier_preactivated is true means supplier cannot enter 
suspend, right?


>> If the link is drop or release, bypass check supplier_preactivated.
>>
>> Signed-off-by: Peter Wang <peter.wang@mediatek.com>
>> ---
>>   drivers/base/core.c          |  2 +-
>>   drivers/base/power/runtime.c | 15 ++++++++++++---
>>   include/linux/pm_runtime.h   |  5 +++--
>>   3 files changed, 16 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/base/core.c b/drivers/base/core.c
>> index 7cd789c4985d..3b9cc559928f 100644
>> --- a/drivers/base/core.c
>> +++ b/drivers/base/core.c
>> @@ -486,7 +486,7 @@ static void device_link_release_fn(struct work_struct *work)
>>          /* Ensure that all references to the link object have been dropped. */
>>          device_link_synchronize_removal();
>>
>> -       pm_runtime_release_supplier(link, true);
>> +       pm_runtime_release_supplier(link, true, true);
>>
>>          put_device(link->consumer);
>>          put_device(link->supplier);
>> diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
>> index 676dc72d912d..3c4f425937a1 100644
>> --- a/drivers/base/power/runtime.c
>> +++ b/drivers/base/power/runtime.c
>> @@ -314,10 +314,19 @@ static int rpm_get_suppliers(struct device *dev)
>>    * and if @check_idle is set, check if that device is idle (and so it can be
>>    * suspended).
>>    */
>> -void pm_runtime_release_supplier(struct device_link *link, bool check_idle)
>> +void pm_runtime_release_supplier(struct device_link *link, bool check_idle,
>> +       bool drop)
>>   {
>>          struct device *supplier = link->supplier;
>>
>> +       /*
>> +        * When consumer hold supplier, supplier cannot enter suspend.
>> +        * Driect release supplier and let supplier enter suspend is not allow.
>> +        * Unless the link is drop, direct relsease supplier should be okay.
>> +        */
>> +       if (link->supplier_preactivated && !drop)
>> +               return;
>> +
>>          /*
>>           * The additional power.usage_count check is a safety net in case
>>           * the rpm_active refcount becomes saturated, in which case
>> @@ -338,7 +347,7 @@ static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend)
>>
>>          list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
>>                                  device_links_read_lock_held())
>> -               pm_runtime_release_supplier(link, try_to_suspend);
>> +               pm_runtime_release_supplier(link, try_to_suspend, false);
>>   }
>>
>>   static void rpm_put_suppliers(struct device *dev)
>> @@ -1838,7 +1847,7 @@ void pm_runtime_drop_link(struct device_link *link)
>>                  return;
>>
>>          pm_runtime_drop_link_count(link->consumer);
>> -       pm_runtime_release_supplier(link, true);
>> +       pm_runtime_release_supplier(link, true, true);
>>   }
>>
>>   static bool pm_runtime_need_not_resume(struct device *dev)
>> diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
>> index 9e4d056967c6..354ffb1eaec0 100644
>> --- a/include/linux/pm_runtime.h
>> +++ b/include/linux/pm_runtime.h
>> @@ -88,7 +88,8 @@ extern void pm_runtime_get_suppliers(struct device *dev);
>>   extern void pm_runtime_put_suppliers(struct device *dev);
>>   extern void pm_runtime_new_link(struct device *dev);
>>   extern void pm_runtime_drop_link(struct device_link *link);
>> -extern void pm_runtime_release_supplier(struct device_link *link, bool check_idle);
>> +extern void pm_runtime_release_supplier(struct device_link *link,
>> +       bool check_idle, bool drop);
>>
>>   extern int devm_pm_runtime_enable(struct device *dev);
>>
>> @@ -315,7 +316,7 @@ static inline void pm_runtime_put_suppliers(struct device *dev) {}
>>   static inline void pm_runtime_new_link(struct device *dev) {}
>>   static inline void pm_runtime_drop_link(struct device_link *link) {}
>>   static inline void pm_runtime_release_supplier(struct device_link *link,
>> -                                              bool check_idle) {}
>> +                                              bool check_idle, bool drop) {}
>>
>>   #endif /* !CONFIG_PM */
>>
>> --
>> 2.18.0
>>
Rafael J. Wysocki June 28, 2022, 3:54 p.m. UTC | #8
On Tue, Jun 28, 2022 at 3:53 AM Peter Wang <peter.wang@mediatek.com> wrote:
>
>
> On 6/28/22 3:00 AM, Rafael J. Wysocki wrote:
> > On Mon, Jun 13, 2022 at 2:08 PM <peter.wang@mediatek.com> wrote:
> >> From: Peter Wang <peter.wang@mediatek.com>
> >>
> >> With divice link of DL_FLAG_PM_RUNTIME, if consumer call pm_runtime_get_suppliers
> >> to prevent supplier enter suspend, pm_runtime_release_supplier should
> >> check supplier_preactivated before let supplier enter suspend.
> > Why?
>
> because supplier_preactivated is true means supplier cannot enter
> suspend, right?

No, it doesn't mean that.
Rafael J. Wysocki June 29, 2022, 4:01 p.m. UTC | #9
[Add CCs to linix-pm, LKML and Greg]

On Wednesday, June 29, 2022 5:32:00 PM CEST Rafael J. Wysocki wrote:
> On Wed, Jun 29, 2022 at 4:47 PM Peter Wang <peter.wang@mediatek.com> wrote:
> >
> >
> > On 6/29/22 9:22 PM, Rafael J. Wysocki wrote:
> > > On Wed, Jun 29, 2022 at 5:02 AM Peter Wang <peter.wang@mediatek.com> wrote:
> > >>
> > >> On 6/28/22 11:54 PM, Rafael J. Wysocki wrote:
> > >>> On Tue, Jun 28, 2022 at 3:53 AM Peter Wang <peter.wang@mediatek.com> wrote:
> > >>>> On 6/28/22 3:00 AM, Rafael J. Wysocki wrote:
> > >>>>> On Mon, Jun 13, 2022 at 2:08 PM <peter.wang@mediatek.com> wrote:
> > >>>>>> From: Peter Wang <peter.wang@mediatek.com>
> > >>>>>>
> > >>>>>> With divice link of DL_FLAG_PM_RUNTIME, if consumer call pm_runtime_get_suppliers
> > >>>>>> to prevent supplier enter suspend, pm_runtime_release_supplier should
> > >>>>>> check supplier_preactivated before let supplier enter suspend.
> > >>>>> Why?
> > >>>> because supplier_preactivated is true means supplier cannot enter
> > >>>> suspend, right?
> > >>> No, it doesn't mean that.
> > >> Hi Rafael,
> > >>
> > >> if supplier_preactivated is true, means someone call
> > >> pm_runtime_get_suppliers and
> > >> before pm_runtime_put_suppliers right? This section suppliers should not
> > >> enter suspend.
> > > No, this is not how this is expected to work.
> > >
> > > First off, the only caller of pm_runtime_get_suppliers() and
> > > pm_runtime_put_suppliers() is __driver_probe_device().  Really nobody
> > > else has any business that would require calling them.
> > Hi Rafael,
> >
> > Yes, you are right!
> > __driver_probe_device the only one use and just because
> > __driver_probe_device use
> > pm_runtime_get_suppliers cause problem.
> >
> >
> > > Second, the role of pm_runtime_get_suppliers() is to "preactivate" the
> > > suppliers before running probe for a consumer device and the role of
> >
> > the role of pm_runtime_get_suppliers() is to "preactivate" the suppliers,
> > but suppliers may suspend immediately after preactivate right?
> > Here is just this case. this is first racing point.
> > Thread A: pm_runtime_get_suppliers                -> __driver_probe_device
> > Thread B: pm_runtime_release_supplier
> > Thread A: Run with supplier not preactivate      -> __driver_probe_device
> >
> > > pm_runtime_put_suppliers() is to do the cleanup in case the device is
> > > left in suspend after probing.
> > >
> > > IOW, pm_runtime_get_suppliers() is to ensure that the suppliers will
> > > be active until the probe callback takes over and the rest depends on
> > > that callback.
> >
> > The problem of this racing will finally let consumer is active but
> > supplier is suspended.
> 
> So it would be better to send a bug report regarding this.
> 
> > The link relation is broken.
> > I know you may curious how it happened? right?
> > Honestly, I am not sure, but I think the second racing point
> > is rpm_get_suppliers and pm_runtime_put_suppliers(release rpm_active).
> 
> I'm not sure what you mean by "the racing point".
> 
> Yes, these functions can run concurrently.
> 
> > So, I try to fix the first racing point and the problem is gone.
> > It is full meet expect, and the pm runtime will work smoothly after
> > __driver_probe_device done.
> 
> I'm almost sure that there is at least one scenario that would be
> broken by this change.

That said, the code in there may be a bit overdesigned.

Does the patch below help?

---
 drivers/base/power/runtime.c |   14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

Index: linux-pm/drivers/base/power/runtime.c
===================================================================
--- linux-pm.orig/drivers/base/power/runtime.c
+++ linux-pm/drivers/base/power/runtime.c
@@ -1768,7 +1768,6 @@ void pm_runtime_get_suppliers(struct dev
 		if (link->flags & DL_FLAG_PM_RUNTIME) {
 			link->supplier_preactivated = true;
 			pm_runtime_get_sync(link->supplier);
-			refcount_inc(&link->rpm_active);
 		}
 
 	device_links_read_unlock(idx);
@@ -1788,19 +1787,8 @@ void pm_runtime_put_suppliers(struct dev
 	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
 				device_links_read_lock_held())
 		if (link->supplier_preactivated) {
-			bool put;
-
 			link->supplier_preactivated = false;
-
-			spin_lock_irq(&dev->power.lock);
-
-			put = pm_runtime_status_suspended(dev) &&
-			      refcount_dec_not_one(&link->rpm_active);
-
-			spin_unlock_irq(&dev->power.lock);
-
-			if (put)
-				pm_runtime_put(link->supplier);
+			pm_runtime_put(link->supplier);
 		}
 
 	device_links_read_unlock(idx);
Peter Wang (王信友) June 30, 2022, 2:26 p.m. UTC | #10
On 6/30/22 12:01 AM, Rafael J. Wysocki wrote:
> [Add CCs to linix-pm, LKML and Greg]
>
> On Wednesday, June 29, 2022 5:32:00 PM CEST Rafael J. Wysocki wrote:
>> On Wed, Jun 29, 2022 at 4:47 PM Peter Wang <peter.wang@mediatek.com> wrote:
>>>
>>> On 6/29/22 9:22 PM, Rafael J. Wysocki wrote:
>>>> On Wed, Jun 29, 2022 at 5:02 AM Peter Wang <peter.wang@mediatek.com> wrote:
>>>>> On 6/28/22 11:54 PM, Rafael J. Wysocki wrote:
>>>>>> On Tue, Jun 28, 2022 at 3:53 AM Peter Wang <peter.wang@mediatek.com> wrote:
>>>>>>> On 6/28/22 3:00 AM, Rafael J. Wysocki wrote:
>>>>>>>> On Mon, Jun 13, 2022 at 2:08 PM <peter.wang@mediatek.com> wrote:
>>>>>>>>> From: Peter Wang <peter.wang@mediatek.com>
>>>>>>>>>
>>>>>>>>> With divice link of DL_FLAG_PM_RUNTIME, if consumer call pm_runtime_get_suppliers
>>>>>>>>> to prevent supplier enter suspend, pm_runtime_release_supplier should
>>>>>>>>> check supplier_preactivated before let supplier enter suspend.
>>>>>>>> Why?
>>>>>>> because supplier_preactivated is true means supplier cannot enter
>>>>>>> suspend, right?
>>>>>> No, it doesn't mean that.
>>>>> Hi Rafael,
>>>>>
>>>>> if supplier_preactivated is true, means someone call
>>>>> pm_runtime_get_suppliers and
>>>>> before pm_runtime_put_suppliers right? This section suppliers should not
>>>>> enter suspend.
>>>> No, this is not how this is expected to work.
>>>>
>>>> First off, the only caller of pm_runtime_get_suppliers() and
>>>> pm_runtime_put_suppliers() is __driver_probe_device().  Really nobody
>>>> else has any business that would require calling them.
>>> Hi Rafael,
>>>
>>> Yes, you are right!
>>> __driver_probe_device the only one use and just because
>>> __driver_probe_device use
>>> pm_runtime_get_suppliers cause problem.
>>>
>>>
>>>> Second, the role of pm_runtime_get_suppliers() is to "preactivate" the
>>>> suppliers before running probe for a consumer device and the role of
>>> the role of pm_runtime_get_suppliers() is to "preactivate" the suppliers,
>>> but suppliers may suspend immediately after preactivate right?
>>> Here is just this case. this is first racing point.
>>> Thread A: pm_runtime_get_suppliers                -> __driver_probe_device
>>> Thread B: pm_runtime_release_supplier
>>> Thread A: Run with supplier not preactivate      -> __driver_probe_device
>>>
>>>> pm_runtime_put_suppliers() is to do the cleanup in case the device is
>>>> left in suspend after probing.
>>>>
>>>> IOW, pm_runtime_get_suppliers() is to ensure that the suppliers will
>>>> be active until the probe callback takes over and the rest depends on
>>>> that callback.
>>> The problem of this racing will finally let consumer is active but
>>> supplier is suspended.
>> So it would be better to send a bug report regarding this.
>>
>>> The link relation is broken.
>>> I know you may curious how it happened? right?
>>> Honestly, I am not sure, but I think the second racing point
>>> is rpm_get_suppliers and pm_runtime_put_suppliers(release rpm_active).
>> I'm not sure what you mean by "the racing point".
>>
>> Yes, these functions can run concurrently.
>>
>>> So, I try to fix the first racing point and the problem is gone.
>>> It is full meet expect, and the pm runtime will work smoothly after
>>> __driver_probe_device done.
>> I'm almost sure that there is at least one scenario that would be
>> broken by this change.
> That said, the code in there may be a bit overdesigned.
>
> Does the patch below help?
>
> ---
>   drivers/base/power/runtime.c |   14 +-------------
>   1 file changed, 1 insertion(+), 13 deletions(-)
>
> Index: linux-pm/drivers/base/power/runtime.c
> ===================================================================
> --- linux-pm.orig/drivers/base/power/runtime.c
> +++ linux-pm/drivers/base/power/runtime.c
> @@ -1768,7 +1768,6 @@ void pm_runtime_get_suppliers(struct dev
>   		if (link->flags & DL_FLAG_PM_RUNTIME) {
>   			link->supplier_preactivated = true;
>   			pm_runtime_get_sync(link->supplier);
> -			refcount_inc(&link->rpm_active);
>   		}
>   
>   	device_links_read_unlock(idx);
> @@ -1788,19 +1787,8 @@ void pm_runtime_put_suppliers(struct dev
>   	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
>   				device_links_read_lock_held())
>   		if (link->supplier_preactivated) {
> -			bool put;
> -
>   			link->supplier_preactivated = false;
> -
> -			spin_lock_irq(&dev->power.lock);
> -
> -			put = pm_runtime_status_suspended(dev) &&
> -			      refcount_dec_not_one(&link->rpm_active);
> -
> -			spin_unlock_irq(&dev->power.lock);
> -
> -			if (put)
> -				pm_runtime_put(link->supplier);
> +			pm_runtime_put(link->supplier);
>   		}
>   
>   	device_links_read_unlock(idx);


Hi Rafael,

I think this patch solve the rpm_active racing problem.
But it still have problem that
pm_runtime_get_suppliers call pm_runtime_get_sync(link->supplier)
and supplier could suspend immediately by other thread who call
pm_runtime_release_supplier.

Thanks.
Peter


>
>
Rafael J. Wysocki June 30, 2022, 2:47 p.m. UTC | #11
On Thu, Jun 30, 2022 at 4:26 PM Peter Wang <peter.wang@mediatek.com> wrote:
>
>
> On 6/30/22 12:01 AM, Rafael J. Wysocki wrote:
> > [Add CCs to linix-pm, LKML and Greg]
> >
> > On Wednesday, June 29, 2022 5:32:00 PM CEST Rafael J. Wysocki wrote:
> >> On Wed, Jun 29, 2022 at 4:47 PM Peter Wang <peter.wang@mediatek.com> wrote:
> >>>
> >>> On 6/29/22 9:22 PM, Rafael J. Wysocki wrote:
> >>>> On Wed, Jun 29, 2022 at 5:02 AM Peter Wang <peter.wang@mediatek.com> wrote:
> >>>>> On 6/28/22 11:54 PM, Rafael J. Wysocki wrote:
> >>>>>> On Tue, Jun 28, 2022 at 3:53 AM Peter Wang <peter.wang@mediatek.com> wrote:
> >>>>>>> On 6/28/22 3:00 AM, Rafael J. Wysocki wrote:
> >>>>>>>> On Mon, Jun 13, 2022 at 2:08 PM <peter.wang@mediatek.com> wrote:
> >>>>>>>>> From: Peter Wang <peter.wang@mediatek.com>
> >>>>>>>>>
> >>>>>>>>> With divice link of DL_FLAG_PM_RUNTIME, if consumer call pm_runtime_get_suppliers
> >>>>>>>>> to prevent supplier enter suspend, pm_runtime_release_supplier should
> >>>>>>>>> check supplier_preactivated before let supplier enter suspend.
> >>>>>>>> Why?
> >>>>>>> because supplier_preactivated is true means supplier cannot enter
> >>>>>>> suspend, right?
> >>>>>> No, it doesn't mean that.
> >>>>> Hi Rafael,
> >>>>>
> >>>>> if supplier_preactivated is true, means someone call
> >>>>> pm_runtime_get_suppliers and
> >>>>> before pm_runtime_put_suppliers right? This section suppliers should not
> >>>>> enter suspend.
> >>>> No, this is not how this is expected to work.
> >>>>
> >>>> First off, the only caller of pm_runtime_get_suppliers() and
> >>>> pm_runtime_put_suppliers() is __driver_probe_device().  Really nobody
> >>>> else has any business that would require calling them.
> >>> Hi Rafael,
> >>>
> >>> Yes, you are right!
> >>> __driver_probe_device the only one use and just because
> >>> __driver_probe_device use
> >>> pm_runtime_get_suppliers cause problem.
> >>>
> >>>
> >>>> Second, the role of pm_runtime_get_suppliers() is to "preactivate" the
> >>>> suppliers before running probe for a consumer device and the role of
> >>> the role of pm_runtime_get_suppliers() is to "preactivate" the suppliers,
> >>> but suppliers may suspend immediately after preactivate right?
> >>> Here is just this case. this is first racing point.
> >>> Thread A: pm_runtime_get_suppliers                -> __driver_probe_device
> >>> Thread B: pm_runtime_release_supplier
> >>> Thread A: Run with supplier not preactivate      -> __driver_probe_device
> >>>
> >>>> pm_runtime_put_suppliers() is to do the cleanup in case the device is
> >>>> left in suspend after probing.
> >>>>
> >>>> IOW, pm_runtime_get_suppliers() is to ensure that the suppliers will
> >>>> be active until the probe callback takes over and the rest depends on
> >>>> that callback.
> >>> The problem of this racing will finally let consumer is active but
> >>> supplier is suspended.
> >> So it would be better to send a bug report regarding this.
> >>
> >>> The link relation is broken.
> >>> I know you may curious how it happened? right?
> >>> Honestly, I am not sure, but I think the second racing point
> >>> is rpm_get_suppliers and pm_runtime_put_suppliers(release rpm_active).
> >> I'm not sure what you mean by "the racing point".
> >>
> >> Yes, these functions can run concurrently.
> >>
> >>> So, I try to fix the first racing point and the problem is gone.
> >>> It is full meet expect, and the pm runtime will work smoothly after
> >>> __driver_probe_device done.
> >> I'm almost sure that there is at least one scenario that would be
> >> broken by this change.
> > That said, the code in there may be a bit overdesigned.
> >
> > Does the patch below help?
> >
> > ---
> >   drivers/base/power/runtime.c |   14 +-------------
> >   1 file changed, 1 insertion(+), 13 deletions(-)
> >
> > Index: linux-pm/drivers/base/power/runtime.c
> > ===================================================================
> > --- linux-pm.orig/drivers/base/power/runtime.c
> > +++ linux-pm/drivers/base/power/runtime.c
> > @@ -1768,7 +1768,6 @@ void pm_runtime_get_suppliers(struct dev
> >               if (link->flags & DL_FLAG_PM_RUNTIME) {
> >                       link->supplier_preactivated = true;
> >                       pm_runtime_get_sync(link->supplier);
> > -                     refcount_inc(&link->rpm_active);
> >               }
> >
> >       device_links_read_unlock(idx);
> > @@ -1788,19 +1787,8 @@ void pm_runtime_put_suppliers(struct dev
> >       list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
> >                               device_links_read_lock_held())
> >               if (link->supplier_preactivated) {
> > -                     bool put;
> > -
> >                       link->supplier_preactivated = false;
> > -
> > -                     spin_lock_irq(&dev->power.lock);
> > -
> > -                     put = pm_runtime_status_suspended(dev) &&
> > -                           refcount_dec_not_one(&link->rpm_active);
> > -
> > -                     spin_unlock_irq(&dev->power.lock);
> > -
> > -                     if (put)
> > -                             pm_runtime_put(link->supplier);
> > +                     pm_runtime_put(link->supplier);
> >               }
> >
> >       device_links_read_unlock(idx);
>
>
> Hi Rafael,
>
> I think this patch solve the rpm_active racing problem.
> But it still have problem that
> pm_runtime_get_suppliers call pm_runtime_get_sync(link->supplier)
> and supplier could suspend immediately by other thread who call
> pm_runtime_release_supplier.

No, it won't, because pm_runtime_release_supplier() won't drop the
reference on the supplier taken by pm_runtime_get_suppliers(0 after
the patch.
Peter Wang (王信友) June 30, 2022, 3:19 p.m. UTC | #12
On 6/30/22 10:47 PM, Rafael J. Wysocki wrote:
> On Thu, Jun 30, 2022 at 4:26 PM Peter Wang <peter.wang@mediatek.com> wrote:
>>
>> On 6/30/22 12:01 AM, Rafael J. Wysocki wrote:
>>> [Add CCs to linix-pm, LKML and Greg]
>>>
>>> On Wednesday, June 29, 2022 5:32:00 PM CEST Rafael J. Wysocki wrote:
>>>> On Wed, Jun 29, 2022 at 4:47 PM Peter Wang <peter.wang@mediatek.com> wrote:
>>>>> On 6/29/22 9:22 PM, Rafael J. Wysocki wrote:
>>>>>> On Wed, Jun 29, 2022 at 5:02 AM Peter Wang <peter.wang@mediatek.com> wrote:
>>>>>>> On 6/28/22 11:54 PM, Rafael J. Wysocki wrote:
>>>>>>>> On Tue, Jun 28, 2022 at 3:53 AM Peter Wang <peter.wang@mediatek.com> wrote:
>>>>>>>>> On 6/28/22 3:00 AM, Rafael J. Wysocki wrote:
>>>>>>>>>> On Mon, Jun 13, 2022 at 2:08 PM <peter.wang@mediatek.com> wrote:
>>>>>>>>>>> From: Peter Wang <peter.wang@mediatek.com>
>>>>>>>>>>>
>>>>>>>>>>> With divice link of DL_FLAG_PM_RUNTIME, if consumer call pm_runtime_get_suppliers
>>>>>>>>>>> to prevent supplier enter suspend, pm_runtime_release_supplier should
>>>>>>>>>>> check supplier_preactivated before let supplier enter suspend.
>>>>>>>>>> Why?
>>>>>>>>> because supplier_preactivated is true means supplier cannot enter
>>>>>>>>> suspend, right?
>>>>>>>> No, it doesn't mean that.
>>>>>>> Hi Rafael,
>>>>>>>
>>>>>>> if supplier_preactivated is true, means someone call
>>>>>>> pm_runtime_get_suppliers and
>>>>>>> before pm_runtime_put_suppliers right? This section suppliers should not
>>>>>>> enter suspend.
>>>>>> No, this is not how this is expected to work.
>>>>>>
>>>>>> First off, the only caller of pm_runtime_get_suppliers() and
>>>>>> pm_runtime_put_suppliers() is __driver_probe_device().  Really nobody
>>>>>> else has any business that would require calling them.
>>>>> Hi Rafael,
>>>>>
>>>>> Yes, you are right!
>>>>> __driver_probe_device the only one use and just because
>>>>> __driver_probe_device use
>>>>> pm_runtime_get_suppliers cause problem.
>>>>>
>>>>>
>>>>>> Second, the role of pm_runtime_get_suppliers() is to "preactivate" the
>>>>>> suppliers before running probe for a consumer device and the role of
>>>>> the role of pm_runtime_get_suppliers() is to "preactivate" the suppliers,
>>>>> but suppliers may suspend immediately after preactivate right?
>>>>> Here is just this case. this is first racing point.
>>>>> Thread A: pm_runtime_get_suppliers                -> __driver_probe_device
>>>>> Thread B: pm_runtime_release_supplier
>>>>> Thread A: Run with supplier not preactivate      -> __driver_probe_device
>>>>>
>>>>>> pm_runtime_put_suppliers() is to do the cleanup in case the device is
>>>>>> left in suspend after probing.
>>>>>>
>>>>>> IOW, pm_runtime_get_suppliers() is to ensure that the suppliers will
>>>>>> be active until the probe callback takes over and the rest depends on
>>>>>> that callback.
>>>>> The problem of this racing will finally let consumer is active but
>>>>> supplier is suspended.
>>>> So it would be better to send a bug report regarding this.
>>>>
>>>>> The link relation is broken.
>>>>> I know you may curious how it happened? right?
>>>>> Honestly, I am not sure, but I think the second racing point
>>>>> is rpm_get_suppliers and pm_runtime_put_suppliers(release rpm_active).
>>>> I'm not sure what you mean by "the racing point".
>>>>
>>>> Yes, these functions can run concurrently.
>>>>
>>>>> So, I try to fix the first racing point and the problem is gone.
>>>>> It is full meet expect, and the pm runtime will work smoothly after
>>>>> __driver_probe_device done.
>>>> I'm almost sure that there is at least one scenario that would be
>>>> broken by this change.
>>> That said, the code in there may be a bit overdesigned.
>>>
>>> Does the patch below help?
>>>
>>> ---
>>>    drivers/base/power/runtime.c |   14 +-------------
>>>    1 file changed, 1 insertion(+), 13 deletions(-)
>>>
>>> Index: linux-pm/drivers/base/power/runtime.c
>>> ===================================================================
>>> --- linux-pm.orig/drivers/base/power/runtime.c
>>> +++ linux-pm/drivers/base/power/runtime.c
>>> @@ -1768,7 +1768,6 @@ void pm_runtime_get_suppliers(struct dev
>>>                if (link->flags & DL_FLAG_PM_RUNTIME) {
>>>                        link->supplier_preactivated = true;
>>>                        pm_runtime_get_sync(link->supplier);
>>> -                     refcount_inc(&link->rpm_active);
>>>                }
>>>
>>>        device_links_read_unlock(idx);
>>> @@ -1788,19 +1787,8 @@ void pm_runtime_put_suppliers(struct dev
>>>        list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
>>>                                device_links_read_lock_held())
>>>                if (link->supplier_preactivated) {
>>> -                     bool put;
>>> -
>>>                        link->supplier_preactivated = false;
>>> -
>>> -                     spin_lock_irq(&dev->power.lock);
>>> -
>>> -                     put = pm_runtime_status_suspended(dev) &&
>>> -                           refcount_dec_not_one(&link->rpm_active);
>>> -
>>> -                     spin_unlock_irq(&dev->power.lock);
>>> -
>>> -                     if (put)
>>> -                             pm_runtime_put(link->supplier);
>>> +                     pm_runtime_put(link->supplier);
>>>                }
>>>
>>>        device_links_read_unlock(idx);
>>
>> Hi Rafael,
>>
>> I think this patch solve the rpm_active racing problem.
>> But it still have problem that
>> pm_runtime_get_suppliers call pm_runtime_get_sync(link->supplier)
>> and supplier could suspend immediately by other thread who call
>> pm_runtime_release_supplier.
> No, it won't, because pm_runtime_release_supplier() won't drop the
> reference on the supplier taken by pm_runtime_get_suppliers(0 after
> the patch.

Hi Rafael,

I think pm_runtime_release_supplier will always decrese the reference
rpm_active count to 1 and check idle will let supplier enter suspend. Am 
I wrong?
Could you explain why this patch won't drop the reference?

Thanks

Peter
Rafael J. Wysocki June 30, 2022, 4:28 p.m. UTC | #13
On Thu, Jun 30, 2022 at 5:19 PM Peter Wang <peter.wang@mediatek.com> wrote:
>
>
> On 6/30/22 10:47 PM, Rafael J. Wysocki wrote:
> > On Thu, Jun 30, 2022 at 4:26 PM Peter Wang <peter.wang@mediatek.com> wrote:
> >>
> >> On 6/30/22 12:01 AM, Rafael J. Wysocki wrote:
> >>> [Add CCs to linix-pm, LKML and Greg]
> >>>
> >>> On Wednesday, June 29, 2022 5:32:00 PM CEST Rafael J. Wysocki wrote:
> >>>> On Wed, Jun 29, 2022 at 4:47 PM Peter Wang <peter.wang@mediatek.com> wrote:
> >>>>> On 6/29/22 9:22 PM, Rafael J. Wysocki wrote:
> >>>>>> On Wed, Jun 29, 2022 at 5:02 AM Peter Wang <peter.wang@mediatek.com> wrote:
> >>>>>>> On 6/28/22 11:54 PM, Rafael J. Wysocki wrote:
> >>>>>>>> On Tue, Jun 28, 2022 at 3:53 AM Peter Wang <peter.wang@mediatek.com> wrote:
> >>>>>>>>> On 6/28/22 3:00 AM, Rafael J. Wysocki wrote:
> >>>>>>>>>> On Mon, Jun 13, 2022 at 2:08 PM <peter.wang@mediatek.com> wrote:
> >>>>>>>>>>> From: Peter Wang <peter.wang@mediatek.com>
> >>>>>>>>>>>
> >>>>>>>>>>> With divice link of DL_FLAG_PM_RUNTIME, if consumer call pm_runtime_get_suppliers
> >>>>>>>>>>> to prevent supplier enter suspend, pm_runtime_release_supplier should
> >>>>>>>>>>> check supplier_preactivated before let supplier enter suspend.
> >>>>>>>>>> Why?
> >>>>>>>>> because supplier_preactivated is true means supplier cannot enter
> >>>>>>>>> suspend, right?
> >>>>>>>> No, it doesn't mean that.
> >>>>>>> Hi Rafael,
> >>>>>>>
> >>>>>>> if supplier_preactivated is true, means someone call
> >>>>>>> pm_runtime_get_suppliers and
> >>>>>>> before pm_runtime_put_suppliers right? This section suppliers should not
> >>>>>>> enter suspend.
> >>>>>> No, this is not how this is expected to work.
> >>>>>>
> >>>>>> First off, the only caller of pm_runtime_get_suppliers() and
> >>>>>> pm_runtime_put_suppliers() is __driver_probe_device().  Really nobody
> >>>>>> else has any business that would require calling them.
> >>>>> Hi Rafael,
> >>>>>
> >>>>> Yes, you are right!
> >>>>> __driver_probe_device the only one use and just because
> >>>>> __driver_probe_device use
> >>>>> pm_runtime_get_suppliers cause problem.
> >>>>>
> >>>>>
> >>>>>> Second, the role of pm_runtime_get_suppliers() is to "preactivate" the
> >>>>>> suppliers before running probe for a consumer device and the role of
> >>>>> the role of pm_runtime_get_suppliers() is to "preactivate" the suppliers,
> >>>>> but suppliers may suspend immediately after preactivate right?
> >>>>> Here is just this case. this is first racing point.
> >>>>> Thread A: pm_runtime_get_suppliers                -> __driver_probe_device
> >>>>> Thread B: pm_runtime_release_supplier
> >>>>> Thread A: Run with supplier not preactivate      -> __driver_probe_device
> >>>>>
> >>>>>> pm_runtime_put_suppliers() is to do the cleanup in case the device is
> >>>>>> left in suspend after probing.
> >>>>>>
> >>>>>> IOW, pm_runtime_get_suppliers() is to ensure that the suppliers will
> >>>>>> be active until the probe callback takes over and the rest depends on
> >>>>>> that callback.
> >>>>> The problem of this racing will finally let consumer is active but
> >>>>> supplier is suspended.
> >>>> So it would be better to send a bug report regarding this.
> >>>>
> >>>>> The link relation is broken.
> >>>>> I know you may curious how it happened? right?
> >>>>> Honestly, I am not sure, but I think the second racing point
> >>>>> is rpm_get_suppliers and pm_runtime_put_suppliers(release rpm_active).
> >>>> I'm not sure what you mean by "the racing point".
> >>>>
> >>>> Yes, these functions can run concurrently.
> >>>>
> >>>>> So, I try to fix the first racing point and the problem is gone.
> >>>>> It is full meet expect, and the pm runtime will work smoothly after
> >>>>> __driver_probe_device done.
> >>>> I'm almost sure that there is at least one scenario that would be
> >>>> broken by this change.
> >>> That said, the code in there may be a bit overdesigned.
> >>>
> >>> Does the patch below help?
> >>>
> >>> ---
> >>>    drivers/base/power/runtime.c |   14 +-------------
> >>>    1 file changed, 1 insertion(+), 13 deletions(-)
> >>>
> >>> Index: linux-pm/drivers/base/power/runtime.c
> >>> ===================================================================
> >>> --- linux-pm.orig/drivers/base/power/runtime.c
> >>> +++ linux-pm/drivers/base/power/runtime.c
> >>> @@ -1768,7 +1768,6 @@ void pm_runtime_get_suppliers(struct dev
> >>>                if (link->flags & DL_FLAG_PM_RUNTIME) {
> >>>                        link->supplier_preactivated = true;
> >>>                        pm_runtime_get_sync(link->supplier);
> >>> -                     refcount_inc(&link->rpm_active);
> >>>                }
> >>>
> >>>        device_links_read_unlock(idx);
> >>> @@ -1788,19 +1787,8 @@ void pm_runtime_put_suppliers(struct dev
> >>>        list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
> >>>                                device_links_read_lock_held())
> >>>                if (link->supplier_preactivated) {
> >>> -                     bool put;
> >>> -
> >>>                        link->supplier_preactivated = false;
> >>> -
> >>> -                     spin_lock_irq(&dev->power.lock);
> >>> -
> >>> -                     put = pm_runtime_status_suspended(dev) &&
> >>> -                           refcount_dec_not_one(&link->rpm_active);
> >>> -
> >>> -                     spin_unlock_irq(&dev->power.lock);
> >>> -
> >>> -                     if (put)
> >>> -                             pm_runtime_put(link->supplier);
> >>> +                     pm_runtime_put(link->supplier);
> >>>                }
> >>>
> >>>        device_links_read_unlock(idx);
> >>
> >> Hi Rafael,
> >>
> >> I think this patch solve the rpm_active racing problem.
> >> But it still have problem that
> >> pm_runtime_get_suppliers call pm_runtime_get_sync(link->supplier)
> >> and supplier could suspend immediately by other thread who call
> >> pm_runtime_release_supplier.
> > No, it won't, because pm_runtime_release_supplier() won't drop the
> > reference on the supplier taken by pm_runtime_get_suppliers(0 after
> > the patch.
>
> Hi Rafael,
>
> I think pm_runtime_release_supplier will always decrese the reference
> rpm_active count to 1 and check idle will let supplier enter suspend. Am
> I wrong?
>
> Could you explain why this patch won't drop the reference?

What matters is the supplier's PM-runtime usage counter and (with the
patch above applied) pm_runtime_get_suppliers() bumps it up via
pm_runtime_get_sync() and it doesn't bump up the device link's
rpm_active count at the same time.

This is important, because the number of times
pm_runtime_release_supplier() decrements the supplier's usage counter
is the same as the rpm_active count value at the beginning of that
function minus 1.  Now, rpm_active is 1 initially and every time it
gets incremented, the supplier's usage counter is also incremented.
Combined with the observation in the previous paragraph, this means
that after pm_runtime_get_suppliers() the value of the supplier's
PM-runtime usage counter will always be greater than the rpm_active
value minus 1, so pm_runtime_release_supplier() cannot decrement it
down to zero until pm_runtime_put_suppliers() runs.
Peter Wang (王信友) July 1, 2022, 10:21 a.m. UTC | #14
On 7/1/22 12:28 AM, Rafael J. Wysocki wrote:
> On Thu, Jun 30, 2022 at 5:19 PM Peter Wang <peter.wang@mediatek.com> wrote:
>>
>> On 6/30/22 10:47 PM, Rafael J. Wysocki wrote:
>>> On Thu, Jun 30, 2022 at 4:26 PM Peter Wang <peter.wang@mediatek.com> wrote:
>>>> On 6/30/22 12:01 AM, Rafael J. Wysocki wrote:
>>>>> [Add CCs to linix-pm, LKML and Greg]
>>>>>
>>>>> On Wednesday, June 29, 2022 5:32:00 PM CEST Rafael J. Wysocki wrote:
>>>>>> On Wed, Jun 29, 2022 at 4:47 PM Peter Wang <peter.wang@mediatek.com> wrote:
>>>>>>> On 6/29/22 9:22 PM, Rafael J. Wysocki wrote:
>>>>>>>> On Wed, Jun 29, 2022 at 5:02 AM Peter Wang <peter.wang@mediatek.com> wrote:
>>>>>>>>> On 6/28/22 11:54 PM, Rafael J. Wysocki wrote:
>>>>>>>>>> On Tue, Jun 28, 2022 at 3:53 AM Peter Wang <peter.wang@mediatek.com> wrote:
>>>>>>>>>>> On 6/28/22 3:00 AM, Rafael J. Wysocki wrote:
>>>>>>>>>>>> On Mon, Jun 13, 2022 at 2:08 PM <peter.wang@mediatek.com> wrote:
>>>>>>>>>>>>> From: Peter Wang <peter.wang@mediatek.com>
>>>>>>>>>>>>>
>>>>>>>>>>>>> With divice link of DL_FLAG_PM_RUNTIME, if consumer call pm_runtime_get_suppliers
>>>>>>>>>>>>> to prevent supplier enter suspend, pm_runtime_release_supplier should
>>>>>>>>>>>>> check supplier_preactivated before let supplier enter suspend.
>>>>>>>>>>>> Why?
>>>>>>>>>>> because supplier_preactivated is true means supplier cannot enter
>>>>>>>>>>> suspend, right?
>>>>>>>>>> No, it doesn't mean that.
>>>>>>>>> Hi Rafael,
>>>>>>>>>
>>>>>>>>> if supplier_preactivated is true, means someone call
>>>>>>>>> pm_runtime_get_suppliers and
>>>>>>>>> before pm_runtime_put_suppliers right? This section suppliers should not
>>>>>>>>> enter suspend.
>>>>>>>> No, this is not how this is expected to work.
>>>>>>>>
>>>>>>>> First off, the only caller of pm_runtime_get_suppliers() and
>>>>>>>> pm_runtime_put_suppliers() is __driver_probe_device().  Really nobody
>>>>>>>> else has any business that would require calling them.
>>>>>>> Hi Rafael,
>>>>>>>
>>>>>>> Yes, you are right!
>>>>>>> __driver_probe_device the only one use and just because
>>>>>>> __driver_probe_device use
>>>>>>> pm_runtime_get_suppliers cause problem.
>>>>>>>
>>>>>>>
>>>>>>>> Second, the role of pm_runtime_get_suppliers() is to "preactivate" the
>>>>>>>> suppliers before running probe for a consumer device and the role of
>>>>>>> the role of pm_runtime_get_suppliers() is to "preactivate" the suppliers,
>>>>>>> but suppliers may suspend immediately after preactivate right?
>>>>>>> Here is just this case. this is first racing point.
>>>>>>> Thread A: pm_runtime_get_suppliers                -> __driver_probe_device
>>>>>>> Thread B: pm_runtime_release_supplier
>>>>>>> Thread A: Run with supplier not preactivate      -> __driver_probe_device
>>>>>>>
>>>>>>>> pm_runtime_put_suppliers() is to do the cleanup in case the device is
>>>>>>>> left in suspend after probing.
>>>>>>>>
>>>>>>>> IOW, pm_runtime_get_suppliers() is to ensure that the suppliers will
>>>>>>>> be active until the probe callback takes over and the rest depends on
>>>>>>>> that callback.
>>>>>>> The problem of this racing will finally let consumer is active but
>>>>>>> supplier is suspended.
>>>>>> So it would be better to send a bug report regarding this.
>>>>>>
>>>>>>> The link relation is broken.
>>>>>>> I know you may curious how it happened? right?
>>>>>>> Honestly, I am not sure, but I think the second racing point
>>>>>>> is rpm_get_suppliers and pm_runtime_put_suppliers(release rpm_active).
>>>>>> I'm not sure what you mean by "the racing point".
>>>>>>
>>>>>> Yes, these functions can run concurrently.
>>>>>>
>>>>>>> So, I try to fix the first racing point and the problem is gone.
>>>>>>> It is full meet expect, and the pm runtime will work smoothly after
>>>>>>> __driver_probe_device done.
>>>>>> I'm almost sure that there is at least one scenario that would be
>>>>>> broken by this change.
>>>>> That said, the code in there may be a bit overdesigned.
>>>>>
>>>>> Does the patch below help?
>>>>>
>>>>> ---
>>>>>     drivers/base/power/runtime.c |   14 +-------------
>>>>>     1 file changed, 1 insertion(+), 13 deletions(-)
>>>>>
>>>>> Index: linux-pm/drivers/base/power/runtime.c
>>>>> ===================================================================
>>>>> --- linux-pm.orig/drivers/base/power/runtime.c
>>>>> +++ linux-pm/drivers/base/power/runtime.c
>>>>> @@ -1768,7 +1768,6 @@ void pm_runtime_get_suppliers(struct dev
>>>>>                 if (link->flags & DL_FLAG_PM_RUNTIME) {
>>>>>                         link->supplier_preactivated = true;
>>>>>                         pm_runtime_get_sync(link->supplier);
>>>>> -                     refcount_inc(&link->rpm_active);
>>>>>                 }
>>>>>
>>>>>         device_links_read_unlock(idx);
>>>>> @@ -1788,19 +1787,8 @@ void pm_runtime_put_suppliers(struct dev
>>>>>         list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
>>>>>                                 device_links_read_lock_held())
>>>>>                 if (link->supplier_preactivated) {
>>>>> -                     bool put;
>>>>> -
>>>>>                         link->supplier_preactivated = false;
>>>>> -
>>>>> -                     spin_lock_irq(&dev->power.lock);
>>>>> -
>>>>> -                     put = pm_runtime_status_suspended(dev) &&
>>>>> -                           refcount_dec_not_one(&link->rpm_active);
>>>>> -
>>>>> -                     spin_unlock_irq(&dev->power.lock);
>>>>> -
>>>>> -                     if (put)
>>>>> -                             pm_runtime_put(link->supplier);
>>>>> +                     pm_runtime_put(link->supplier);
>>>>>                 }
>>>>>
>>>>>         device_links_read_unlock(idx);
>>>> Hi Rafael,
>>>>
>>>> I think this patch solve the rpm_active racing problem.
>>>> But it still have problem that
>>>> pm_runtime_get_suppliers call pm_runtime_get_sync(link->supplier)
>>>> and supplier could suspend immediately by other thread who call
>>>> pm_runtime_release_supplier.
>>> No, it won't, because pm_runtime_release_supplier() won't drop the
>>> reference on the supplier taken by pm_runtime_get_suppliers(0 after
>>> the patch.
>> Hi Rafael,
>>
>> I think pm_runtime_release_supplier will always decrese the reference
>> rpm_active count to 1 and check idle will let supplier enter suspend. Am
>> I wrong?
>>
>> Could you explain why this patch won't drop the reference?
> What matters is the supplier's PM-runtime usage counter and (with the
> patch above applied) pm_runtime_get_suppliers() bumps it up via
> pm_runtime_get_sync() and it doesn't bump up the device link's
> rpm_active count at the same time.
>
> This is important, because the number of times
> pm_runtime_release_supplier() decrements the supplier's usage counter
> is the same as the rpm_active count value at the beginning of that
> function minus 1.  Now, rpm_active is 1 initially and every time it
> gets incremented, the supplier's usage counter is also incremented.
> Combined with the observation in the previous paragraph, this means
> that after pm_runtime_get_suppliers() the value of the supplier's
> PM-runtime usage counter will always be greater than the rpm_active
> value minus 1, so pm_runtime_release_supplier() cannot decrement it
> down to zero until pm_runtime_put_suppliers() runs.

Hi Rafael,

Yes, it is very clear!
I miss this important key point that usage_count is always > rpm_active 1.
I think this patch could work.

Thanks.
Peter
Peter Wang (王信友) Aug. 2, 2022, 3:19 a.m. UTC | #15
> Hi Rafael,
>
> Yes, it is very clear!
> I miss this important key point that usage_count is always > 
> rpm_active 1.
> I think this patch could work.
>
> Thanks.
> Peter
>
>
>
>
Hi Rafael,

After test with commit ("887371066039011144b4a94af97d9328df6869a2 PM: 
runtime: Fix supplier device management during consumer probe") past weeks,
The supplier still suspend when consumer is active "after" 
pm_runtime_put_suppliers.
Do you have any idea about that?

Thanks.
Peter
Rafael J. Wysocki Aug. 2, 2022, 11:01 a.m. UTC | #16
On Tue, Aug 2, 2022 at 5:19 AM Peter Wang <peter.wang@mediatek.com> wrote:
>
>
> > Hi Rafael,
> >
> > Yes, it is very clear!
> > I miss this important key point that usage_count is always >
> > rpm_active 1.
> > I think this patch could work.
> >
> > Thanks.
> > Peter
> >
> >
> >
> >
> Hi Rafael,
>
> After test with commit ("887371066039011144b4a94af97d9328df6869a2 PM:
> runtime: Fix supplier device management during consumer probe") past weeks,
> The supplier still suspend when consumer is active "after"
> pm_runtime_put_suppliers.
> Do you have any idea about that?

Well, this means that the consumer probe doesn't bump up the
supplier's PM-runtime usage counter as appropriate.

You need to tell me more about what happens during the consumer probe.
Which driver is this?
Peter Wang (王信友) Aug. 2, 2022, 1:33 p.m. UTC | #17
On 8/2/22 7:01 PM, Rafael J. Wysocki wrote:
> On Tue, Aug 2, 2022 at 5:19 AM Peter Wang <peter.wang@mediatek.com> wrote:
>>
>>> Hi Rafael,
>>>
>>> Yes, it is very clear!
>>> I miss this important key point that usage_count is always >
>>> rpm_active 1.
>>> I think this patch could work.
>>>
>>> Thanks.
>>> Peter
>>>
>>>
>>>
>>>
>> Hi Rafael,
>>
>> After test with commit ("887371066039011144b4a94af97d9328df6869a2 PM:
>> runtime: Fix supplier device management during consumer probe") past weeks,
>> The supplier still suspend when consumer is active "after"
>> pm_runtime_put_suppliers.
>> Do you have any idea about that?
> Well, this means that the consumer probe doesn't bump up the
> supplier's PM-runtime usage counter as appropriate.
>
> You need to tell me more about what happens during the consumer probe.
> Which driver is this?

Hi Rafael,

I have the same idea with you. But I still don't know how it could happen.

It is upstream ufs driver in scsi system. Here is call flow
do_scan_async (process 1)
     do_scsi_scan_host
         scsi_scan_host_selected
             scsi_scan_channel
                 __scsi_scan_target
                     scsi_probe_and_add_lun
                         scsi_alloc_sdev
                             slave_alloc     -> setup link
                         scsi_add_lun
                             slave_configure    -> enable rpm
                             scsi_sysfs_add_sdev
                                 scsi_autopm_get_device    <- get runtime pm
                                 device_add                <- invoke 
sd_probe in process 2
                                 scsi_autopm_put_device    <- put 
runtime pm, point 1

driver_probe_device (process 2)
     __driver_probe_device
         pm_runtime_get_suppliers
             really_probe
                 sd_probe
                     scsi_autopm_get_device                <- get 
runtime pm, point 2
                     pm_runtime_set_autosuspend_delay    <- set rpm 
delay to 2s
                     scsi_autopm_put_device                <- put 
runtime pm
         pm_runtime_put_suppliers                        <- 
(link->rpm_active = 1)

After process 1 call scsi_autopm_put_device(point 1) let consumer enter 
suspend,
process 2 call scsi_autopm_get_device(point 2) may have chance resume 
consumer but not
bump up the supplier's PM-runtime usage counter as appropriate.

Thanks.
Peter
Nitin Rawat Oct. 12, 2022, 10:31 a.m. UTC | #18
Hi Peter/Rafael,
We are also observed similiar issue on our platform. Looks like there is 
a race condition(explained below) which cause consumer to resume w/o 
bumping up the supplier's PM-runtime usage counter.

Process 1 (ufshcd_async_scan context)
ufshcd_async_scan()
     scsi_probe_and_add_lun
         scsi_add_lun
             slave_configure    -> enable rpm
                 scsi_sysfs_add_sdev
                     scsi_autopm_get_device
                         device_add     <- invoked sd_probe in process 2
                             scsi_autopm_put_device

Process 2 (sd_probe context)
driver_probe_device
__device_attach_async_helper
     __device_attach_driver
         driver_probe_device
             __driver_probe_device
                 sd_probe
                     scsi_autopm_get_device



Race condition for dev->power.runtime_status for consumer dev 0:0:0:0 
can happen as below in rpm framework

ufshcd_async_scan context (process 1)
scsi_autopm_put_device() //0:0:0:0
	pm_runtime_put_sync()
	__pm_runtime_idle()
	rpm_idle()
	__rpm_callback()
		scsi_runtime_idle()
			pm_runtime_mark_last_busy()
			pm_runtime_autosuspend()
				__pm_runtime_suspend(RPM_AUTO)
				rpm_suspend(RPM_AUTO)
					status = RPM_SUSPENDING
					scsi_runtime_suspend()
						__rpm_callback()
					status = RPM_SUSPENDED------>1
					rpm_suspend_suppliers()
			return -EBUSY

		(use_links)&&(dev->power.runtime_status == RPM_RESUMING && 
retval)------->3
		__rpm_put_suppliers()





sd_probe context (Process 2)
scsi_autopm_get_device() //0:0:0:0
     __pm_runtime_resume(RPM_GET_PUT)
     rpm_resume
      	status = RPM_RESUMING----->2



After power.runtime_status of consumer 0:0:0:0 was changed to 
RPM_SUSPENDED and before scsi_runtime_idle retval was -16(EBUSY) to 
__rpm_callback, power.runtime_status of consumer 0:0:0:0 was changed to 
RPM_RESUMING and hence condition 3 became true and __rpm_put_suppliers 
was called and hence consumer resumed with decremented usage_count due 
to this race condition.

Please let me know your thoughts on this.

Regards,
Nitin

On 8/2/2022 7:03 PM, Peter Wang wrote:
> 
> On 8/2/22 7:01 PM, Rafael J. Wysocki wrote:
>> On Tue, Aug 2, 2022 at 5:19 AM Peter Wang <peter.wang@mediatek.com> 
>> wrote:
>>>
>>>> Hi Rafael,
>>>>
>>>> Yes, it is very clear!
>>>> I miss this important key point that usage_count is always >
>>>> rpm_active 1.
>>>> I think this patch could work.
>>>>
>>>> Thanks.
>>>> Peter
>>>>
>>>>
>>>>
>>>>
>>> Hi Rafael,
>>>
>>> After test with commit ("887371066039011144b4a94af97d9328df6869a2 PM:
>>> runtime: Fix supplier device management during consumer probe") past 
>>> weeks,
>>> The supplier still suspend when consumer is active "after"
>>> pm_runtime_put_suppliers.
>>> Do you have any idea about that?
>> Well, this means that the consumer probe doesn't bump up the
>> supplier's PM-runtime usage counter as appropriate.
>>
>> You need to tell me more about what happens during the consumer probe.
>> Which driver is this?
> 
> Hi Rafael,
> 
> I have the same idea with you. But I still don't know how it could happen.
> 
> It is upstream ufs driver in scsi system. Here is call flow
> do_scan_async (process 1)
>      do_scsi_scan_host
>          scsi_scan_host_selected
>              scsi_scan_channel
>                  __scsi_scan_target
>                      scsi_probe_and_add_lun
>                          scsi_alloc_sdev
>                              slave_alloc     -> setup link
>                          scsi_add_lun
>                              slave_configure    -> enable rpm
>                              scsi_sysfs_add_sdev
>                                  scsi_autopm_get_device    <- get 
> runtime pm
>                                  device_add                <- invoke 
> sd_probe in process 2
>                                  scsi_autopm_put_device    <- put 
> runtime pm, point 1
> 
> driver_probe_device (process 2)
>      __driver_probe_device
>          pm_runtime_get_suppliers
>              really_probe
>                  sd_probe
>                      scsi_autopm_get_device                <- get 
> runtime pm, point 2
>                      pm_runtime_set_autosuspend_delay    <- set rpm 
> delay to 2s
>                      scsi_autopm_put_device                <- put 
> runtime pm
>          pm_runtime_put_suppliers                        <- 
> (link->rpm_active = 1)
> 
> After process 1 call scsi_autopm_put_device(point 1) let consumer enter 
> suspend,
> process 2 call scsi_autopm_get_device(point 2) may have chance resume 
> consumer but not
> bump up the supplier's PM-runtime usage counter as appropriate.
> 
> Thanks.
> Peter
> 
> 
> 
> 
> 
> 
>
diff mbox series

Patch

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 7cd789c4985d..3b9cc559928f 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -486,7 +486,7 @@  static void device_link_release_fn(struct work_struct *work)
 	/* Ensure that all references to the link object have been dropped. */
 	device_link_synchronize_removal();
 
-	pm_runtime_release_supplier(link, true);
+	pm_runtime_release_supplier(link, true, true);
 
 	put_device(link->consumer);
 	put_device(link->supplier);
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 676dc72d912d..3c4f425937a1 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -314,10 +314,19 @@  static int rpm_get_suppliers(struct device *dev)
  * and if @check_idle is set, check if that device is idle (and so it can be
  * suspended).
  */
-void pm_runtime_release_supplier(struct device_link *link, bool check_idle)
+void pm_runtime_release_supplier(struct device_link *link, bool check_idle,
+	bool drop)
 {
 	struct device *supplier = link->supplier;
 
+	/*
+	 * When consumer hold supplier, supplier cannot enter suspend.
+	 * Driect release supplier and let supplier enter suspend is not allow.
+	 * Unless the link is drop, direct relsease supplier should be okay.
+	 */
+	if (link->supplier_preactivated && !drop)
+		return;
+
 	/*
 	 * The additional power.usage_count check is a safety net in case
 	 * the rpm_active refcount becomes saturated, in which case
@@ -338,7 +347,7 @@  static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend)
 
 	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
 				device_links_read_lock_held())
-		pm_runtime_release_supplier(link, try_to_suspend);
+		pm_runtime_release_supplier(link, try_to_suspend, false);
 }
 
 static void rpm_put_suppliers(struct device *dev)
@@ -1838,7 +1847,7 @@  void pm_runtime_drop_link(struct device_link *link)
 		return;
 
 	pm_runtime_drop_link_count(link->consumer);
-	pm_runtime_release_supplier(link, true);
+	pm_runtime_release_supplier(link, true, true);
 }
 
 static bool pm_runtime_need_not_resume(struct device *dev)
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 9e4d056967c6..354ffb1eaec0 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -88,7 +88,8 @@  extern void pm_runtime_get_suppliers(struct device *dev);
 extern void pm_runtime_put_suppliers(struct device *dev);
 extern void pm_runtime_new_link(struct device *dev);
 extern void pm_runtime_drop_link(struct device_link *link);
-extern void pm_runtime_release_supplier(struct device_link *link, bool check_idle);
+extern void pm_runtime_release_supplier(struct device_link *link,
+	bool check_idle, bool drop);
 
 extern int devm_pm_runtime_enable(struct device *dev);
 
@@ -315,7 +316,7 @@  static inline void pm_runtime_put_suppliers(struct device *dev) {}
 static inline void pm_runtime_new_link(struct device *dev) {}
 static inline void pm_runtime_drop_link(struct device_link *link) {}
 static inline void pm_runtime_release_supplier(struct device_link *link,
-					       bool check_idle) {}
+					       bool check_idle, bool drop) {}
 
 #endif /* !CONFIG_PM */