diff mbox

[v7,3/4] block: implement runtime pm strategy

Message ID 1358326941-3725-4-git-send-email-aaron.lu@intel.com (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

Aaron Lu Jan. 16, 2013, 9:02 a.m. UTC
From: Lin Ming <ming.m.lin@intel.com>

When a request is added:
    If device is suspended or is suspending and the request is not a
    PM request, resume the device.

When the last request finishes:
    Call pm_runtime_mark_last_busy().

When pick a request:
    If device is resuming/suspending, then only PM request is allowed
    to go.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Aaron Lu <aaron.lu@intel.com>
---
 block/blk-core.c | 39 +++++++++++++++++++++++++++++++++++++++
 block/elevator.c | 26 ++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

Comments

Alan Stern Jan. 16, 2013, 3:30 p.m. UTC | #1
On Wed, 16 Jan 2013, Aaron Lu wrote:

> From: Lin Ming <ming.m.lin@intel.com>
> 
> When a request is added:
>     If device is suspended or is suspending and the request is not a
>     PM request, resume the device.
> 
> When the last request finishes:
>     Call pm_runtime_mark_last_busy().
> 
> When pick a request:
>     If device is resuming/suspending, then only PM request is allowed
>     to go.
> 
> Signed-off-by: Lin Ming <ming.m.lin@intel.com>
> Signed-off-by: Aaron Lu <aaron.lu@intel.com>

Just a couple of minor problems remaining...

> --- a/block/blk-core.c
> +++ b/block/blk-core.c

> @@ -2051,6 +2063,28 @@ static void blk_account_io_done(struct request *req)
>  	}
>  }
>  
> +#ifdef CONFIG_PM_RUNTIME
> +/*
> + * Don't process normal requests when queue is suspended
> + * or in the process of suspending/resuming
> + */
> +static struct request *blk_pm_peek_request(struct request_queue *q,
> +					   struct request *rq)
> +{
> +	if (q->rpm_status == RPM_SUSPENDED ||
> +	    (q->rpm_status != RPM_ACTIVE && !(rq->cmd_flags & REQ_PM)))
> +		return NULL;
> +	else
> +		return rq;
> +}

You don't check q->dev here, so the result is indefinite for devices
that don't use runtime PM.  (Actually it will work out because
RPM_ACTIVE is defined as 0, but that's a pretty fragile approach.)

Either do check q->dev here, or else explicitly initialize
q->rpm_status when the queue is created.


> --- a/block/elevator.c
> +++ b/block/elevator.c
> @@ -34,6 +34,7 @@
>  #include <linux/blktrace_api.h>
>  #include <linux/hash.h>
>  #include <linux/uaccess.h>
> +#include <linux/pm_runtime.h>
>  
>  #include <trace/events/block.h>
>  
> @@ -515,6 +516,27 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
>  		e->type->ops.elevator_bio_merged_fn(q, rq, bio);
>  }
>  
> +#ifdef CONFIG_PM_RUNTIME
> +static void blk_pm_requeue_request(struct request *rq)
> +{
> +	if (!(rq->cmd_flags & REQ_PM))
> +		rq->q->nr_pending--;
> +}

You don't check q->dev here.  That's okay, but it means that
q->nr_pending will be meaningless or wrong if any I/O takes place
before blk_pm_runtime_init is called.

Therefore the kerneldoc for blk_pm_runtime_init should mention that it
must not be called after any requests have been submitted.  Also
mention that blk_pm_runtime_init enables runtime PM for q->dev, so the 
caller shouldn't do it.

Alan Stern

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Aaron Lu Jan. 17, 2013, 5:13 a.m. UTC | #2
On Wed, Jan 16, 2013 at 10:30:45AM -0500, Alan Stern wrote:
> On Wed, 16 Jan 2013, Aaron Lu wrote:
> 
> > From: Lin Ming <ming.m.lin@intel.com>
> > 
> > When a request is added:
> >     If device is suspended or is suspending and the request is not a
> >     PM request, resume the device.
> > 
> > When the last request finishes:
> >     Call pm_runtime_mark_last_busy().
> > 
> > When pick a request:
> >     If device is resuming/suspending, then only PM request is allowed
> >     to go.
> > 
> > Signed-off-by: Lin Ming <ming.m.lin@intel.com>
> > Signed-off-by: Aaron Lu <aaron.lu@intel.com>
> 
> Just a couple of minor problems remaining...
> 
> > --- a/block/blk-core.c
> > +++ b/block/blk-core.c
> 
> > @@ -2051,6 +2063,28 @@ static void blk_account_io_done(struct request *req)
> >  	}
> >  }
> >  
> > +#ifdef CONFIG_PM_RUNTIME
> > +/*
> > + * Don't process normal requests when queue is suspended
> > + * or in the process of suspending/resuming
> > + */
> > +static struct request *blk_pm_peek_request(struct request_queue *q,
> > +					   struct request *rq)
> > +{
> > +	if (q->rpm_status == RPM_SUSPENDED ||
> > +	    (q->rpm_status != RPM_ACTIVE && !(rq->cmd_flags & REQ_PM)))
> > +		return NULL;
> > +	else
> > +		return rq;
> > +}
> 
> You don't check q->dev here, so the result is indefinite for devices
> that don't use runtime PM.  (Actually it will work out because
> RPM_ACTIVE is defined as 0, but that's a pretty fragile approach.)
> 
> Either do check q->dev here, or else explicitly initialize
> q->rpm_status when the queue is created.

I think I'll check q->dev, which is more clear.

> 
> 
> > --- a/block/elevator.c
> > +++ b/block/elevator.c
> > @@ -34,6 +34,7 @@
> >  #include <linux/blktrace_api.h>
> >  #include <linux/hash.h>
> >  #include <linux/uaccess.h>
> > +#include <linux/pm_runtime.h>
> >  
> >  #include <trace/events/block.h>
> >  
> > @@ -515,6 +516,27 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
> >  		e->type->ops.elevator_bio_merged_fn(q, rq, bio);
> >  }
> >  
> > +#ifdef CONFIG_PM_RUNTIME
> > +static void blk_pm_requeue_request(struct request *rq)
> > +{
> > +	if (!(rq->cmd_flags & REQ_PM))
> > +		rq->q->nr_pending--;
> > +}
> 
> You don't check q->dev here.  That's okay, but it means that
> q->nr_pending will be meaningless or wrong if any I/O takes place
> before blk_pm_runtime_init is called.

Right, so I had better check q->dev here too.

> 
> Therefore the kerneldoc for blk_pm_runtime_init should mention that it
> must not be called after any requests have been submitted.  Also

So with the q->dev check added above, I believe this is not needed.

> mention that blk_pm_runtime_init enables runtime PM for q->dev, so the 
> caller shouldn't do it.

OK, thanks for the suggestion.

-Aaron

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Aaron Lu Jan. 17, 2013, 6:31 a.m. UTC | #3
On Wed, Jan 16, 2013 at 10:30:45AM -0500, Alan Stern wrote:
> On Wed, 16 Jan 2013, Aaron Lu wrote:
> 
> > From: Lin Ming <ming.m.lin@intel.com>
> > 
> > When a request is added:
> >     If device is suspended or is suspending and the request is not a
> >     PM request, resume the device.
> > 
> > When the last request finishes:
> >     Call pm_runtime_mark_last_busy().
> > 
> > When pick a request:
> >     If device is resuming/suspending, then only PM request is allowed
> >     to go.
> > 
> > Signed-off-by: Lin Ming <ming.m.lin@intel.com>
> > Signed-off-by: Aaron Lu <aaron.lu@intel.com>
> 
> Just a couple of minor problems remaining...
> 
> > --- a/block/blk-core.c
> > +++ b/block/blk-core.c
> 
> > @@ -2051,6 +2063,28 @@ static void blk_account_io_done(struct request *req)
> >  	}
> >  }
> >  
> > +#ifdef CONFIG_PM_RUNTIME
> > +/*
> > + * Don't process normal requests when queue is suspended
> > + * or in the process of suspending/resuming
> > + */
> > +static struct request *blk_pm_peek_request(struct request_queue *q,
> > +					   struct request *rq)
> > +{
> > +	if (q->rpm_status == RPM_SUSPENDED ||
> > +	    (q->rpm_status != RPM_ACTIVE && !(rq->cmd_flags & REQ_PM)))
> > +		return NULL;
> > +	else
> > +		return rq;
> > +}
> 
> You don't check q->dev here, so the result is indefinite for devices
> that don't use runtime PM.  (Actually it will work out because
> RPM_ACTIVE is defined as 0, but that's a pretty fragile approach.)
> 
> Either do check q->dev here, or else explicitly initialize
> q->rpm_status when the queue is created.
> 
> 
> > --- a/block/elevator.c
> > +++ b/block/elevator.c
> > @@ -34,6 +34,7 @@
> >  #include <linux/blktrace_api.h>
> >  #include <linux/hash.h>
> >  #include <linux/uaccess.h>
> > +#include <linux/pm_runtime.h>
> >  
> >  #include <trace/events/block.h>
> >  
> > @@ -515,6 +516,27 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
> >  		e->type->ops.elevator_bio_merged_fn(q, rq, bio);
> >  }
> >  
> > +#ifdef CONFIG_PM_RUNTIME
> > +static void blk_pm_requeue_request(struct request *rq)
> > +{
> > +	if (!(rq->cmd_flags & REQ_PM))
> > +		rq->q->nr_pending--;
> > +}
> 
> You don't check q->dev here.  That's okay, but it means that
> q->nr_pending will be meaningless or wrong if any I/O takes place
> before blk_pm_runtime_init is called.
> 
> Therefore the kerneldoc for blk_pm_runtime_init should mention that it
> must not be called after any requests have been submitted.  Also

------------
> mention that blk_pm_runtime_init enables runtime PM for q->dev, so the 
> caller shouldn't do it.

I may misunderstandd this in last email.

We didn't enable runtime PM for the device in blk_pm_runtime_init, just
some auto suspend related settings.

Thanks,
Aaron

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alan Stern Jan. 17, 2013, 3:11 p.m. UTC | #4
On Thu, 17 Jan 2013, Aaron Lu wrote:

> > > @@ -515,6 +516,27 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
> > >  		e->type->ops.elevator_bio_merged_fn(q, rq, bio);
> > >  }
> > >  
> > > +#ifdef CONFIG_PM_RUNTIME
> > > +static void blk_pm_requeue_request(struct request *rq)
> > > +{
> > > +	if (!(rq->cmd_flags & REQ_PM))
> > > +		rq->q->nr_pending--;
> > > +}
> > 
> > You don't check q->dev here.  That's okay, but it means that
> > q->nr_pending will be meaningless or wrong if any I/O takes place
> > before blk_pm_runtime_init is called.
> 
> Right, so I had better check q->dev here too.
> 
> > 
> > Therefore the kerneldoc for blk_pm_runtime_init should mention that it
> > must not be called after any requests have been submitted.  Also
> 
> So with the q->dev check added above, I believe this is not needed.

No, it is still needed.  With the q->dev check added, q->nr_pending 
will always be 0 before blk_pm_runtime_init is called.  But if I/O is 
taking place then the number of pending requests really isn't 0.

Either you have to make sure the q->nr_pending is always correct, even 
when runtime PM isn't being used, or else the caller has to make sure 
that no I/O takes place before blk_pm_runtime_init is called.

> ------------
> > mention that blk_pm_runtime_init enables runtime PM for q->dev, so the
> > caller shouldn't do it.
> 
> I may misunderstandd this in last email.
> 
> We didn't enable runtime PM for the device in blk_pm_runtime_init, just
> some auto suspend related settings.

Sorry, yes, you are right.  In fact, the caller has to make sure that 
runtime PM is enabled before calling blk_pm_runtime_init; otherwise the 
autosuspend timer might not work right.

Alan Stern

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Aaron Lu Jan. 18, 2013, 8:27 a.m. UTC | #5
On Thu, Jan 17, 2013 at 10:11:31AM -0500, Alan Stern wrote:
> On Thu, 17 Jan 2013, Aaron Lu wrote:
> 
> > > > @@ -515,6 +516,27 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
> > > >  		e->type->ops.elevator_bio_merged_fn(q, rq, bio);
> > > >  }
> > > >  
> > > > +#ifdef CONFIG_PM_RUNTIME
> > > > +static void blk_pm_requeue_request(struct request *rq)
> > > > +{
> > > > +	if (!(rq->cmd_flags & REQ_PM))
> > > > +		rq->q->nr_pending--;
> > > > +}
> > > 
> > > You don't check q->dev here.  That's okay, but it means that
> > > q->nr_pending will be meaningless or wrong if any I/O takes place
> > > before blk_pm_runtime_init is called.
> > 
> > Right, so I had better check q->dev here too.
> > 
> > > 
> > > Therefore the kerneldoc for blk_pm_runtime_init should mention that it
> > > must not be called after any requests have been submitted.  Also
> > 
> > So with the q->dev check added above, I believe this is not needed.
> 
> No, it is still needed.  With the q->dev check added, q->nr_pending 
> will always be 0 before blk_pm_runtime_init is called.  But if I/O is 
> taking place then the number of pending requests really isn't 0.

Right, we can't allow I/O to happen when blk_pm_runtime_init is
executing, or we will have an incorrect nr_pending.

> 
> Either you have to make sure the q->nr_pending is always correct, even 
> when runtime PM isn't being used, or else the caller has to make sure 
> that no I/O takes place before blk_pm_runtime_init is called.

I think we can say:
blk_pm_runtime_init can't be called after any requests have been
submitted but not finished.
Sounds more accurate?

Thanks,
Aaron

> 
> > ------------
> > > mention that blk_pm_runtime_init enables runtime PM for q->dev, so the
> > > caller shouldn't do it.
> > 
> > I may misunderstandd this in last email.
> > 
> > We didn't enable runtime PM for the device in blk_pm_runtime_init, just
> > some auto suspend related settings.
> 
> Sorry, yes, you are right.  In fact, the caller has to make sure that 
> runtime PM is enabled before calling blk_pm_runtime_init; otherwise the 
> autosuspend timer might not work right.
> 
> Alan Stern
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alan Stern Jan. 18, 2013, 3:26 p.m. UTC | #6
On Fri, 18 Jan 2013, Aaron Lu wrote:

> > Either you have to make sure the q->nr_pending is always correct, even 
> > when runtime PM isn't being used, or else the caller has to make sure 
> > that no I/O takes place before blk_pm_runtime_init is called.
> 
> I think we can say:
> blk_pm_runtime_init can't be called after any requests have been
> submitted but not finished.
> Sounds more accurate?

Okay.  I think you can add (in parentheses) that in most cases drivers
should call the routine before any I/O has taken place.

Alan Stern

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Aaron Lu Jan. 19, 2013, 6:24 a.m. UTC | #7
On 01/18/2013 11:26 PM, Alan Stern wrote:
> On Fri, 18 Jan 2013, Aaron Lu wrote:
>
>>> Either you have to make sure the q->nr_pending is always correct, even
>>> when runtime PM isn't being used, or else the caller has to make sure
>>> that no I/O takes place before blk_pm_runtime_init is called.
>>
>> I think we can say:
>> blk_pm_runtime_init can't be called after any requests have been
>> submitted but not finished.
>> Sounds more accurate?
>
> Okay.  I think you can add (in parentheses) that in most cases drivers
> should call the routine before any I/O has taken place.

The reason I put it that way is, in patch 4, the blk_pm_runtime_init is
called after a request is executed(scsi_probe_lun). I do not want people
get confused by the comments for blk_pm_runtime_init and the example
code in patch 4 where we didn't follow it :-)

Considering ODD's use case, I was thinking of moving the
blk_pm_runtime_init call to sd.c, as sr will not use request based auto
suspend. Probably right before we decrease usage count for the device in
sd_probe_async. What do you think?

Thanks,
Aaron
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alan Stern Jan. 19, 2013, 6:11 p.m. UTC | #8
On Sat, 19 Jan 2013, Aaron Lu wrote:

> > Okay.  I think you can add (in parentheses) that in most cases drivers
> > should call the routine before any I/O has taken place.
> 
> The reason I put it that way is, in patch 4, the blk_pm_runtime_init is
> called after a request is executed(scsi_probe_lun). I do not want people
> get confused by the comments for blk_pm_runtime_init and the example
> code in patch 4 where we didn't follow it :-)

Right.

> Considering ODD's use case, I was thinking of moving the
> blk_pm_runtime_init call to sd.c, as sr will not use request based auto
> suspend. Probably right before we decrease usage count for the device in
> sd_probe_async. What do you think?

That makes sense.  But then you should review the changes in scsi_pm.c 
to make sure they will work okay for devices that aren't using 
block-layer runtime PM.

Alan Stern

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Aaron Lu Jan. 28, 2013, 9:21 a.m. UTC | #9
On Sat, Jan 19, 2013 at 01:11:45PM -0500, Alan Stern wrote:
> On Sat, 19 Jan 2013, Aaron Lu wrote:
> > Considering ODD's use case, I was thinking of moving the
> > blk_pm_runtime_init call to sd.c, as sr will not use request based auto
> > suspend. Probably right before we decrease usage count for the device in
> > sd_probe_async. What do you think?
> 
> That makes sense.  But then you should review the changes in scsi_pm.c 
> to make sure they will work okay for devices that aren't using 
> block-layer runtime PM.

Now that we have two different runtime PM schemes for scsi device, and
I think there are two ways to make them work:

1 Do it all in scsi_pm.c. In bus' runtime PM callback, check if this
  device is using block layer runtime PM API, and act accordingly;
2 Do it in indivisual drivers' runtime PM callback. Bus' runtime PM
  callbacks just call pm_generic_runtime_xxx, and each driver's runtime
  PM callback will need to do what is appropriate for them.

Personally I want to go with option 1, but I would like to hear your
opinion :-)

And for option 1, the code would be like this:

static int scsi_blk_runtime_suspend(struct device *dev)
{
	struct scsi_device *sdev = to_scsi_device(dev);
	int err;

	err = blk_pre_runtime_suspend(sdev->request_queue);
	if (err)
		return err;
	err = pm_generic_runtime_suspend(dev);
	blk_post_runtime_suspend(sdev->request_queue, err);

	return err;
}

static int scsi_dev_runtime_suspend(struct device *dev)
{
	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
	int err;

	err = scsi_dev_type_suspend(dev, pm ? pm->runtime_suspend : NULL);
	if (err == -EAGAIN)
		pm_schedule_suspend(dev, jiffies_to_msecs(
					round_jiffies_up_relative(HZ/10)));

	return err;
}

static int scsi_runtime_suspend(struct device *dev)
{
	int err = 0;

	dev_dbg(dev, "scsi_runtime_suspend\n");
	if (scsi_is_sdev_device(dev)) {
		struct scsi_device *sdev = to_scsi_device(dev);
		if (sdev->request_queue->dev)
			err = scsi_blk_runtime_suspend(dev);
		else
			err = scsi_dev_runtime_suspend(dev);
	}

	/* Insert hooks here for targets, hosts, and transport classes */

	return err;
}

static int scsi_blk_runtime_resume(struct device *dev)
{
	struct scsi_device *sdev = to_scsi_device(dev);
	int err;

	blk_pre_runtime_resume(sdev->request_queue);
	err = pm_generic_runtime_resume(dev);
	blk_post_runtime_resume(sdev->request_queue, err);

	return err;
}

static int scsi_dev_runtime_resume(struct device *dev)
{
	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
	return scsi_dev_type_resume(dev, pm ? pm->runtime_resume : NULL);
}

static int scsi_runtime_resume(struct device *dev)
{
	int err = 0;

	dev_dbg(dev, "scsi_runtime_resume\n");
	if (scsi_is_sdev_device(dev)) {
		struct scsi_device *sdev = to_scsi_device(dev);
		if (sdev->request_queue->dev)
			err = scsi_blk_runtime_resume(dev);
		else
			err = scsi_dev_runtime_resume(dev);
	}

	/* Insert hooks here for targets, hosts, and transport classes */

	return err;
}

static int scsi_runtime_idle(struct device *dev)
{
	int err;

	dev_dbg(dev, "scsi_runtime_idle\n");

	/* Insert hooks here for targets, hosts, and transport classes */

	if (scsi_is_sdev_device(dev)) {
		struct scsi_device *sdev = to_scsi_device(dev);
		if (sdev->request_queue->dev) {
			pm_runtime_mark_last_busy(dev);
			err = pm_runtime_autosuspend(dev);
		} else {
			err = pm_schedule_suspend(dev, 100);
		}
	} else {
		err = pm_runtime_suspend(dev);
	}
	return err;
}

Please feel free to suggest, thanks.

-Aaron

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alan Stern Jan. 28, 2013, 3:11 p.m. UTC | #10
On Mon, 28 Jan 2013, Aaron Lu wrote:

> On Sat, Jan 19, 2013 at 01:11:45PM -0500, Alan Stern wrote:
> > On Sat, 19 Jan 2013, Aaron Lu wrote:
> > > Considering ODD's use case, I was thinking of moving the
> > > blk_pm_runtime_init call to sd.c, as sr will not use request based auto
> > > suspend. Probably right before we decrease usage count for the device in
> > > sd_probe_async. What do you think?
> > 
> > That makes sense.  But then you should review the changes in scsi_pm.c 
> > to make sure they will work okay for devices that aren't using 
> > block-layer runtime PM.
> 
> Now that we have two different runtime PM schemes for scsi device, and
> I think there are two ways to make them work:
> 
> 1 Do it all in scsi_pm.c. In bus' runtime PM callback, check if this
>   device is using block layer runtime PM API, and act accordingly;
> 2 Do it in indivisual drivers' runtime PM callback. Bus' runtime PM
>   callbacks just call pm_generic_runtime_xxx, and each driver's runtime
>   PM callback will need to do what is appropriate for them.
> 
> Personally I want to go with option 1, but I would like to hear your
> opinion :-)

I don't think it matters very much.  Option 1 is okay with me.  James 
might have a stronger feeling one way or the other.

Alan Stern

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/block/blk-core.c b/block/blk-core.c
index e441daf..d1bf5dc 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1264,6 +1264,16 @@  void part_round_stats(int cpu, struct hd_struct *part)
 }
 EXPORT_SYMBOL_GPL(part_round_stats);
 
+#ifdef CONFIG_PM_RUNTIME
+static void blk_pm_put_request(struct request *rq)
+{
+	if (rq->q->dev && !(rq->cmd_flags & REQ_PM) && !--rq->q->nr_pending)
+		pm_runtime_mark_last_busy(rq->q->dev);
+}
+#else
+static inline void blk_pm_put_request(struct request *rq) {}
+#endif
+
 /*
  * queue lock must be held
  */
@@ -1274,6 +1284,8 @@  void __blk_put_request(struct request_queue *q, struct request *req)
 	if (unlikely(--req->ref_count))
 		return;
 
+	blk_pm_put_request(req);
+
 	elv_completed_request(q, req);
 
 	/* this is a bio leak */
@@ -2051,6 +2063,28 @@  static void blk_account_io_done(struct request *req)
 	}
 }
 
+#ifdef CONFIG_PM_RUNTIME
+/*
+ * Don't process normal requests when queue is suspended
+ * or in the process of suspending/resuming
+ */
+static struct request *blk_pm_peek_request(struct request_queue *q,
+					   struct request *rq)
+{
+	if (q->rpm_status == RPM_SUSPENDED ||
+	    (q->rpm_status != RPM_ACTIVE && !(rq->cmd_flags & REQ_PM)))
+		return NULL;
+	else
+		return rq;
+}
+#else
+static inline struct request *blk_pm_peek_request(struct request_queue *q,
+					   struct request *rq)
+{
+	return rq;
+}
+#endif
+
 /**
  * blk_peek_request - peek at the top of a request queue
  * @q: request queue to peek at
@@ -2073,6 +2107,11 @@  struct request *blk_peek_request(struct request_queue *q)
 	int ret;
 
 	while ((rq = __elv_next_request(q)) != NULL) {
+
+		rq = blk_pm_peek_request(q, rq);
+		if (!rq)
+			break;
+
 		if (!(rq->cmd_flags & REQ_STARTED)) {
 			/*
 			 * This is the first time the device driver
diff --git a/block/elevator.c b/block/elevator.c
index 11683bb..0e954e3 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -34,6 +34,7 @@ 
 #include <linux/blktrace_api.h>
 #include <linux/hash.h>
 #include <linux/uaccess.h>
+#include <linux/pm_runtime.h>
 
 #include <trace/events/block.h>
 
@@ -515,6 +516,27 @@  void elv_bio_merged(struct request_queue *q, struct request *rq,
 		e->type->ops.elevator_bio_merged_fn(q, rq, bio);
 }
 
+#ifdef CONFIG_PM_RUNTIME
+static void blk_pm_requeue_request(struct request *rq)
+{
+	if (!(rq->cmd_flags & REQ_PM))
+		rq->q->nr_pending--;
+}
+
+static void blk_pm_add_request(struct request_queue *q, struct request *rq)
+{
+	if (q->dev && !(rq->cmd_flags & REQ_PM) && q->nr_pending++ == 0 &&
+	    (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING))
+		pm_request_resume(q->dev);
+}
+#else
+static inline void blk_pm_requeue_request(struct request *rq) {}
+static inline void blk_pm_add_request(struct request_queue *q,
+				      struct request *rq)
+{
+}
+#endif
+
 void elv_requeue_request(struct request_queue *q, struct request *rq)
 {
 	/*
@@ -529,6 +551,8 @@  void elv_requeue_request(struct request_queue *q, struct request *rq)
 
 	rq->cmd_flags &= ~REQ_STARTED;
 
+	blk_pm_requeue_request(rq);
+
 	__elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE);
 }
 
@@ -551,6 +575,8 @@  void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 {
 	trace_block_rq_insert(q, rq);
 
+	blk_pm_add_request(q, rq);
+
 	rq->q = q;
 
 	if (rq->cmd_flags & REQ_SOFTBARRIER) {