diff mbox

PM: Enable asynchronous noirq resume threads to save the resuming time

Message ID 1389683888.3650.78.camel@cliu38-desktop-build (mailing list archive)
State RFC, archived
Headers show

Commit Message

Chuansheng Liu Jan. 14, 2014, 7:18 a.m. UTC
Currently, the dpm_resume_noirq() is done synchronously, and for PCI devices
pci_pm_resume_noirq():

pci_pm_resume_noirq()
 pci_pm_default_resume_early()
  pci_power_up()
   pci_raw_set_power_state()
Which set the device from D3hot to D0 mostly, for every device, there will
be one 10ms(pci_pm_d3_delay) to wait.

Hence normally dpm_resume_noirq() will cost > 100ms, which is bigger for mobile
platform.

Here implementing it with asynchronous way which will reduce much.

For example below, The 80% time is saved.
With synchronous way:
[ 1411.272218] PM: noirq resume of devices complete after 92.223 msecs
With asynchronous way:
[  110.616735] PM: noirq resume of devices complete after 10.544 msecs

Signed-off-by: Liu, Chuansheng <chuansheng.liu@intel.com>
---
 drivers/base/power/main.c |   42 +++++++++++++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 9 deletions(-)

Comments

Rafael J. Wysocki Jan. 14, 2014, 10:54 p.m. UTC | #1
On Tuesday, January 14, 2014 03:18:08 PM Chuansheng Liu wrote:
> 
> Currently, the dpm_resume_noirq() is done synchronously, and for PCI devices
> pci_pm_resume_noirq():
> 
> pci_pm_resume_noirq()
>  pci_pm_default_resume_early()
>   pci_power_up()
>    pci_raw_set_power_state()
> Which set the device from D3hot to D0 mostly, for every device, there will
> be one 10ms(pci_pm_d3_delay) to wait.
> 
> Hence normally dpm_resume_noirq() will cost > 100ms, which is bigger for mobile
> platform.

pci_pm_d3_delay usually is not 10 ms.  What is 10 ms is dev->d3_delay, but
that also is not on every platform now.

That said the approach here makes sense, but I have two questions:

1. On what systems has it been tested?
2. What about suspend_late/resume_early?  If the _noirq things are to be executed
   asynchronously, those should be too.

Thanks!

> Here implementing it with asynchronous way which will reduce much.
> 
> For example below, The 80% time is saved.
> With synchronous way:
> [ 1411.272218] PM: noirq resume of devices complete after 92.223 msecs
> With asynchronous way:
> [  110.616735] PM: noirq resume of devices complete after 10.544 msecs
> 
> Signed-off-by: Liu, Chuansheng <chuansheng.liu@intel.com>
> ---
>  drivers/base/power/main.c |   42 +++++++++++++++++++++++++++++++++---------
>  1 file changed, 33 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
> index 1b41fca..1b9d774 100644
> --- a/drivers/base/power/main.c
> +++ b/drivers/base/power/main.c
> @@ -505,6 +505,19 @@ static int device_resume_noirq(struct device *dev, pm_message_t state)
>  	return error;
>  }
>  
> +static bool is_async(struct device *dev);
> +
> +static void async_resume_noirq(void *data, async_cookie_t cookie)
> +{
> +	struct device *dev = (struct device *)data;
> +	int error;
> +
> +	error = device_resume_noirq(dev, pm_transition);
> +	if (error)
> +		pm_dev_err(dev, pm_transition, " noirq", error);
> +	put_device(dev);
> +}
> +
>  /**
>   * dpm_resume_noirq - Execute "noirq resume" callbacks for all devices.
>   * @state: PM transition of the system being carried out.
> @@ -514,29 +527,40 @@ static int device_resume_noirq(struct device *dev, pm_message_t state)
>   */
>  static void dpm_resume_noirq(pm_message_t state)
>  {
> +	struct device *dev;
>  	ktime_t starttime = ktime_get();
> +	pm_transition = state;
> +
> +	list_for_each_entry(dev, &dpm_noirq_list, power.entry) {
> +		if (is_async(dev)) {
> +			get_device(dev);
> +			async_schedule(async_resume_noirq, dev);
> +		}
> +	}
>  
>  	mutex_lock(&dpm_list_mtx);
>  	while (!list_empty(&dpm_noirq_list)) {
> -		struct device *dev = to_device(dpm_noirq_list.next);
> -		int error;
> +		dev = to_device(dpm_noirq_list.next);
>  
>  		get_device(dev);
>  		list_move_tail(&dev->power.entry, &dpm_late_early_list);
>  		mutex_unlock(&dpm_list_mtx);
>  
> -		error = device_resume_noirq(dev, state);
> -		if (error) {
> -			suspend_stats.failed_resume_noirq++;
> -			dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);
> -			dpm_save_failed_dev(dev_name(dev));
> -			pm_dev_err(dev, state, " noirq", error);
> +		if (!is_async(dev)) {
> +			int error;
> +			error = device_resume_noirq(dev, state);
> +			if (error) {
> +				suspend_stats.failed_resume_noirq++;
> +				dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);
> +				dpm_save_failed_dev(dev_name(dev));
> +				pm_dev_err(dev, state, " noirq", error);
> +			}
>  		}
> -
>  		mutex_lock(&dpm_list_mtx);
>  		put_device(dev);
>  	}
>  	mutex_unlock(&dpm_list_mtx);
> +	async_synchronize_full();
>  	dpm_show_time(starttime, state, "noirq");
>  	resume_device_irqs();
>  	cpuidle_resume();
>
Chuansheng Liu Jan. 15, 2014, 12:35 a.m. UTC | #2
SGVsbG8gUmFmYWVsLA0KDQo+IC0tLS0tT3JpZ2luYWwgTWVzc2FnZS0tLS0tDQo+IEZyb206IFJh
ZmFlbCBKLiBXeXNvY2tpIFttYWlsdG86cmp3QHJqd3lzb2NraS5uZXRdDQo+IFNlbnQ6IFdlZG5l
c2RheSwgSmFudWFyeSAxNSwgMjAxNCA2OjU1IEFNDQo+IFRvOiBMaXUsIENodWFuc2hlbmcNCj4g
Q2M6IGdyZWdraEBsaW51eGZvdW5kYXRpb24ub3JnOyBCcm93biwgTGVuOyBwYXZlbEB1Y3cuY3o7
DQo+IGxpbnV4LXBtQHZnZXIua2VybmVsLm9yZzsgbGludXgta2VybmVsQHZnZXIua2VybmVsLm9y
ZzsgTGksIFpodWFuZ3poaQ0KPiBTdWJqZWN0OiBSZTogW1BBVENIXSBQTTogRW5hYmxlIGFzeW5j
aHJvbm91cyBub2lycSByZXN1bWUgdGhyZWFkcyB0byBzYXZlDQo+IHRoZSByZXN1bWluZyB0aW1l
DQo+IA0KPiBPbiBUdWVzZGF5LCBKYW51YXJ5IDE0LCAyMDE0IDAzOjE4OjA4IFBNIENodWFuc2hl
bmcgTGl1IHdyb3RlOg0KPiA+DQo+ID4gQ3VycmVudGx5LCB0aGUgZHBtX3Jlc3VtZV9ub2lycSgp
IGlzIGRvbmUgc3luY2hyb25vdXNseSwgYW5kIGZvciBQQ0kgZGV2aWNlcw0KPiA+IHBjaV9wbV9y
ZXN1bWVfbm9pcnEoKToNCj4gPg0KPiA+IHBjaV9wbV9yZXN1bWVfbm9pcnEoKQ0KPiA+ICBwY2lf
cG1fZGVmYXVsdF9yZXN1bWVfZWFybHkoKQ0KPiA+ICAgcGNpX3Bvd2VyX3VwKCkNCj4gPiAgICBw
Y2lfcmF3X3NldF9wb3dlcl9zdGF0ZSgpDQo+ID4gV2hpY2ggc2V0IHRoZSBkZXZpY2UgZnJvbSBE
M2hvdCB0byBEMCBtb3N0bHksIGZvciBldmVyeSBkZXZpY2UsIHRoZXJlIHdpbGwNCj4gPiBiZSBv
bmUgMTBtcyhwY2lfcG1fZDNfZGVsYXkpIHRvIHdhaXQuDQo+ID4NCj4gPiBIZW5jZSBub3JtYWxs
eSBkcG1fcmVzdW1lX25vaXJxKCkgd2lsbCBjb3N0ID4gMTAwbXMsIHdoaWNoIGlzIGJpZ2dlciBm
b3INCj4gbW9iaWxlDQo+ID4gcGxhdGZvcm0uDQo+IA0KPiBwY2lfcG1fZDNfZGVsYXkgdXN1YWxs
eSBpcyBub3QgMTAgbXMuICBXaGF0IGlzIDEwIG1zIGlzIGRldi0+ZDNfZGVsYXksIGJ1dA0KPiB0
aGF0IGFsc28gaXMgbm90IG9uIGV2ZXJ5IHBsYXRmb3JtIG5vdy4NClllcywgaXQgaXMgZDNfZGVs
YXkgZXhhY3RseSwgdGhhbmtzIHlvdXIgY29ycmVjdGlvbi4NCg0KPiANCj4gVGhhdCBzYWlkIHRo
ZSBhcHByb2FjaCBoZXJlIG1ha2VzIHNlbnNlLCBidXQgSSBoYXZlIHR3byBxdWVzdGlvbnM6DQo+
IA0KPiAxLiBPbiB3aGF0IHN5c3RlbXMgaGFzIGl0IGJlZW4gdGVzdGVkPw0KSSBoYXZlIGJlZW4g
dGVzdGVkIG9uIEJheXRyYWlsIHBsYXRmb3JtLCBhbmQgYXQgdGhlIGJlZ2lubmluZyB0aGUgZDNf
ZGVsYXkgaXMgc2V0IHRvIDAsDQpidXQgd2UgcmVhbGx5IG1lZXQgdGhlIEhBTkcgaXNzdWUgaWYg
d2Ugb3BlcmF0ZSB0aGUgZGV2aWNlIGltbWVkaWF0ZWx5IHNvbWV0aW1lcywNCnNvIGN1cnJlbnRs
eSBtb3N0IFBDSSBkZXZpY2VzIGluIEJheXRyYWlsIGhhcyB0aGUgMTBtcyBkZWxheS4NCkFuZCBp
dCBjYXVzZWQgdGhlIHdob2xlIHJlc3VtZSB0aW1lIGJpZ2dlciB0aGFuIENsb3ZlcnRyYWlsIHBs
YXRmb3JtOw0KDQo+IDIuIFdoYXQgYWJvdXQgc3VzcGVuZF9sYXRlL3Jlc3VtZV9lYXJseT8gIElm
IHRoZSBfbm9pcnEgdGhpbmdzIGFyZSB0byBiZQ0KPiBleGVjdXRlZCBhc3luY2hyb25vdXNseSwg
dGhvc2Ugc2hvdWxkIGJlIHRvby4NCkkgbm90aWNlZCBpdCBqdXN0IHRha2VuIGxpdHRsZSB0aW1l
IG9mdGVuLCBhbmQgSSBjYW5ub3QgZmluZCBvdXQgc29tZSByZWFzb25zIGZvciBjaGFuZ2luZyB0
aGVtDQppbiB0aGVvcnksIHNvIGlmIHlvdSBsaWtlIGl0LCBJIHdpbGwgdXBkYXRlIHRoZW0gaW50
byBwYXRjaCBWMiBhbHNvOikNCg0K
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rafael J. Wysocki Jan. 15, 2014, 1:28 p.m. UTC | #3
On Wednesday, January 15, 2014 12:35:16 AM Liu, Chuansheng wrote:
> Hello Rafael,
> 
> > -----Original Message-----
> > From: Rafael J. Wysocki [mailto:rjw@rjwysocki.net]
> > Sent: Wednesday, January 15, 2014 6:55 AM
> > To: Liu, Chuansheng
> > Cc: gregkh@linuxfoundation.org; Brown, Len; pavel@ucw.cz;
> > linux-pm@vger.kernel.org; linux-kernel@vger.kernel.org; Li, Zhuangzhi
> > Subject: Re: [PATCH] PM: Enable asynchronous noirq resume threads to save
> > the resuming time
> > 
> > On Tuesday, January 14, 2014 03:18:08 PM Chuansheng Liu wrote:
> > >
> > > Currently, the dpm_resume_noirq() is done synchronously, and for PCI devices
> > > pci_pm_resume_noirq():
> > >
> > > pci_pm_resume_noirq()
> > >  pci_pm_default_resume_early()
> > >   pci_power_up()
> > >    pci_raw_set_power_state()
> > > Which set the device from D3hot to D0 mostly, for every device, there will
> > > be one 10ms(pci_pm_d3_delay) to wait.
> > >
> > > Hence normally dpm_resume_noirq() will cost > 100ms, which is bigger for
> > mobile
> > > platform.
> > 
> > pci_pm_d3_delay usually is not 10 ms.  What is 10 ms is dev->d3_delay, but
> > that also is not on every platform now.
> Yes, it is d3_delay exactly, thanks your correction.
> 
> > 
> > That said the approach here makes sense, but I have two questions:
> > 
> > 1. On what systems has it been tested?
> I have been tested on Baytrail platform, and at the beginning the d3_delay is set to 0,
> but we really meet the HANG issue if we operate the device immediately sometimes,
> so currently most PCI devices in Baytrail has the 10ms delay.
> And it caused the whole resume time bigger than Clovertrail platform;

Does setting d3_delay to something smaller that 10 ms work on BayTrail?

You can try to set 5 ms and then 2.5 ms if that works and so on.  That still
would be an improvement from the 10 ms.

> > 2. What about suspend_late/resume_early?  If the _noirq things are to be
> > executed asynchronously, those should be too.
> I noticed it just taken little time often, and I cannot find out some reasons for changing them
> in theory, so if you like it, I will update them into patch V2 also:)

Yes, please do that.

Thanks!
Chuansheng Liu Jan. 16, 2014, 12:50 a.m. UTC | #4
> -----Original Message-----

> From: Rafael J. Wysocki [mailto:rjw@rjwysocki.net]

> Sent: Wednesday, January 15, 2014 9:28 PM

> To: Liu, Chuansheng

> Cc: gregkh@linuxfoundation.org; Brown, Len; pavel@ucw.cz;

> linux-pm@vger.kernel.org; linux-kernel@vger.kernel.org; Li, Zhuangzhi

> Subject: Re: [PATCH] PM: Enable asynchronous noirq resume threads to save

> the resuming time

> 

> On Wednesday, January 15, 2014 12:35:16 AM Liu, Chuansheng wrote:

> > Hello Rafael,

> >

> > > -----Original Message-----

> > > From: Rafael J. Wysocki [mailto:rjw@rjwysocki.net]

> > > Sent: Wednesday, January 15, 2014 6:55 AM

> > > To: Liu, Chuansheng

> > > Cc: gregkh@linuxfoundation.org; Brown, Len; pavel@ucw.cz;

> > > linux-pm@vger.kernel.org; linux-kernel@vger.kernel.org; Li, Zhuangzhi

> > > Subject: Re: [PATCH] PM: Enable asynchronous noirq resume threads to

> save

> > > the resuming time

> > >

> > > On Tuesday, January 14, 2014 03:18:08 PM Chuansheng Liu wrote:

> > > >

> > > > Currently, the dpm_resume_noirq() is done synchronously, and for PCI

> devices

> > > > pci_pm_resume_noirq():

> > > >

> > > > pci_pm_resume_noirq()

> > > >  pci_pm_default_resume_early()

> > > >   pci_power_up()

> > > >    pci_raw_set_power_state()

> > > > Which set the device from D3hot to D0 mostly, for every device, there will

> > > > be one 10ms(pci_pm_d3_delay) to wait.

> > > >

> > > > Hence normally dpm_resume_noirq() will cost > 100ms, which is bigger

> for

> > > mobile

> > > > platform.

> > >

> > > pci_pm_d3_delay usually is not 10 ms.  What is 10 ms is dev->d3_delay,

> but

> > > that also is not on every platform now.

> > Yes, it is d3_delay exactly, thanks your correction.

> >

> > >

> > > That said the approach here makes sense, but I have two questions:

> > >

> > > 1. On what systems has it been tested?

> > I have been tested on Baytrail platform, and at the beginning the d3_delay is

> set to 0,

> > but we really meet the HANG issue if we operate the device immediately

> sometimes,

> > so currently most PCI devices in Baytrail has the 10ms delay.

> > And it caused the whole resume time bigger than Clovertrail platform;

> 

> Does setting d3_delay to something smaller that 10 ms work on BayTrail?

> 

> You can try to set 5 ms and then 2.5 ms if that works and so on.  That still

> would be an improvement from the 10 ms.

Yes, for most devices in Baytrail platform, we already set the default value to 3ms, it works well now,
but for rare device, we set the d3_delay to 10ms.
So with this patch, the noirq execution time is 10ms;

> 

> > > 2. What about suspend_late/resume_early?  If the _noirq things are to be

> > > executed asynchronously, those should be too.

> > I noticed it just taken little time often, and I cannot find out some reasons for

> changing them

> > in theory, so if you like it, I will update them into patch V2 also:)

> 

> Yes, please do that.

Will do it, thanks.
diff mbox

Patch

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 1b41fca..1b9d774 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -505,6 +505,19 @@  static int device_resume_noirq(struct device *dev, pm_message_t state)
 	return error;
 }
 
+static bool is_async(struct device *dev);
+
+static void async_resume_noirq(void *data, async_cookie_t cookie)
+{
+	struct device *dev = (struct device *)data;
+	int error;
+
+	error = device_resume_noirq(dev, pm_transition);
+	if (error)
+		pm_dev_err(dev, pm_transition, " noirq", error);
+	put_device(dev);
+}
+
 /**
  * dpm_resume_noirq - Execute "noirq resume" callbacks for all devices.
  * @state: PM transition of the system being carried out.
@@ -514,29 +527,40 @@  static int device_resume_noirq(struct device *dev, pm_message_t state)
  */
 static void dpm_resume_noirq(pm_message_t state)
 {
+	struct device *dev;
 	ktime_t starttime = ktime_get();
+	pm_transition = state;
+
+	list_for_each_entry(dev, &dpm_noirq_list, power.entry) {
+		if (is_async(dev)) {
+			get_device(dev);
+			async_schedule(async_resume_noirq, dev);
+		}
+	}
 
 	mutex_lock(&dpm_list_mtx);
 	while (!list_empty(&dpm_noirq_list)) {
-		struct device *dev = to_device(dpm_noirq_list.next);
-		int error;
+		dev = to_device(dpm_noirq_list.next);
 
 		get_device(dev);
 		list_move_tail(&dev->power.entry, &dpm_late_early_list);
 		mutex_unlock(&dpm_list_mtx);
 
-		error = device_resume_noirq(dev, state);
-		if (error) {
-			suspend_stats.failed_resume_noirq++;
-			dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);
-			dpm_save_failed_dev(dev_name(dev));
-			pm_dev_err(dev, state, " noirq", error);
+		if (!is_async(dev)) {
+			int error;
+			error = device_resume_noirq(dev, state);
+			if (error) {
+				suspend_stats.failed_resume_noirq++;
+				dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);
+				dpm_save_failed_dev(dev_name(dev));
+				pm_dev_err(dev, state, " noirq", error);
+			}
 		}
-
 		mutex_lock(&dpm_list_mtx);
 		put_device(dev);
 	}
 	mutex_unlock(&dpm_list_mtx);
+	async_synchronize_full();
 	dpm_show_time(starttime, state, "noirq");
 	resume_device_irqs();
 	cpuidle_resume();