diff mbox

[3/3] scsi: avoid to hold host-wide counter of host_busy for scsi_mq

Message ID 20180420065742.8043-4-ming.lei@redhat.com (mailing list archive)
State Not Applicable
Headers show

Commit Message

Ming Lei April 20, 2018, 6:57 a.m. UTC
It isn't necessary to check the host depth in scsi_queue_rq() any more
since it has been respected by blk-mq before calling scsi_queue_rq() via
getting driver tag.

Lots of LUNs may attach to same host, and per-host IOPS may reach millions
level, so we should avoid to this expensive atomic operations on the
hostwide counter in IO path.

This patch implemens scsi_host_busy() via blk_mq_tagset_busy_iter() for
reading the count of busy IOs for scsi_mq.

It is observed that IOPS is increased by 15% in IO test on scsi_debug
(32 LUNs, 32 submit queues, 1024 can_queue, libaio/dio) in one
dual-socket system.

Cc: Omar Sandoval <osandov@fb.com>,
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>,
Cc: James Bottomley <james.bottomley@hansenpartnership.com>,
Cc: Christoph Hellwig <hch@lst.de>,
Cc: Don Brace <don.brace@microsemi.com>
Cc: Kashyap Desai <kashyap.desai@broadcom.com>
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Laurence Oberman <loberman@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/scsi/hosts.c    | 24 +++++++++++++++++++++++-
 drivers/scsi/scsi_lib.c | 23 +++++++++++++++++------
 2 files changed, 40 insertions(+), 7 deletions(-)

Comments

Bart Van Assche April 27, 2018, 4:16 p.m. UTC | #1
On Fri, 2018-04-20 at 14:57 +0800, Ming Lei wrote:
> +struct scsi_host_mq_in_flight {

> +	int cnt;

> +};

> +

> +static void scsi_host_check_in_flight(struct request *rq, void *data,

> +		bool reserved)

> +{

> +	struct scsi_host_mq_in_flight *in_flight = data;

> +

> +	if (blk_mq_request_started(rq))

> +		in_flight->cnt++;

> +}

> +

>  /**

>   * scsi_host_busy - Return the host busy counter

>   * @shost:	Pointer to Scsi_Host to inc.

>   **/

>  int scsi_host_busy(struct Scsi_Host *shost)

>  {

> -	return atomic_read(&shost->host_busy);

> +	struct scsi_host_mq_in_flight in_flight = {

> +		.cnt = 0,

> +	};

> +

> +	if (!shost->use_blk_mq)

> +		return atomic_read(&shost->host_busy);

> +

> +	blk_mq_tagset_busy_iter(&shost->tag_set, scsi_host_check_in_flight,

> +			&in_flight);

> +	return in_flight.cnt;

>  }

>  EXPORT_SYMBOL(scsi_host_busy);


This patch introduces a subtle behavior change that has not been explained
in the commit message. If a SCSI request gets requeued that results in a
decrease of the .host_busy counter by scsi_device_unbusy() before the request
is requeued and an increase of the host_busy counter when scsi_queue_rq() is
called again. During that time such requests have the state MQ_RQ_COMPLETE and
hence blk_mq_request_started() will return true and scsi_host_check_in_flight()
will include these requests. In other words, this patch introduces a subtle
behavior change that has not been explained in the commit message. Hence I'm
doubt that this change is correct.

Bart.
Ming Lei April 28, 2018, 8:26 a.m. UTC | #2
On Fri, Apr 27, 2018 at 04:16:48PM +0000, Bart Van Assche wrote:
> On Fri, 2018-04-20 at 14:57 +0800, Ming Lei wrote:
> > +struct scsi_host_mq_in_flight {
> > +	int cnt;
> > +};
> > +
> > +static void scsi_host_check_in_flight(struct request *rq, void *data,
> > +		bool reserved)
> > +{
> > +	struct scsi_host_mq_in_flight *in_flight = data;
> > +
> > +	if (blk_mq_request_started(rq))
> > +		in_flight->cnt++;
> > +}
> > +
> >  /**
> >   * scsi_host_busy - Return the host busy counter
> >   * @shost:	Pointer to Scsi_Host to inc.
> >   **/
> >  int scsi_host_busy(struct Scsi_Host *shost)
> >  {
> > -	return atomic_read(&shost->host_busy);
> > +	struct scsi_host_mq_in_flight in_flight = {
> > +		.cnt = 0,
> > +	};
> > +
> > +	if (!shost->use_blk_mq)
> > +		return atomic_read(&shost->host_busy);
> > +
> > +	blk_mq_tagset_busy_iter(&shost->tag_set, scsi_host_check_in_flight,
> > +			&in_flight);
> > +	return in_flight.cnt;
> >  }
> >  EXPORT_SYMBOL(scsi_host_busy);
> 
> This patch introduces a subtle behavior change that has not been explained
> in the commit message. If a SCSI request gets requeued that results in a
> decrease of the .host_busy counter by scsi_device_unbusy() before the request
> is requeued and an increase of the host_busy counter when scsi_queue_rq() is
> called again. During that time such requests have the state MQ_RQ_COMPLETE and
> hence blk_mq_request_started() will return true and scsi_host_check_in_flight()

No, __blk_mq_requeue_request() will change the rq state into MQ_RQ_IDLE,
so such issue you worried about, please look at scsi_mq_requeue_cmd(),
which calls blk_mq_requeue_request(), which puts driver tag and updates
rq's state to IDLE.

> will include these requests. In other words, this patch introduces a subtle
> behavior change that has not been explained in the commit message. Hence I'm
> doubt that this change is correct.

As I explained above, no such issue.


Thanks,
Ming
diff mbox

Patch

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 69beb30205f1..ad56e2b10ac8 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -564,13 +564,35 @@  struct Scsi_Host *scsi_host_get(struct Scsi_Host *shost)
 }
 EXPORT_SYMBOL(scsi_host_get);
 
+struct scsi_host_mq_in_flight {
+	int cnt;
+};
+
+static void scsi_host_check_in_flight(struct request *rq, void *data,
+		bool reserved)
+{
+	struct scsi_host_mq_in_flight *in_flight = data;
+
+	if (blk_mq_request_started(rq))
+		in_flight->cnt++;
+}
+
 /**
  * scsi_host_busy - Return the host busy counter
  * @shost:	Pointer to Scsi_Host to inc.
  **/
 int scsi_host_busy(struct Scsi_Host *shost)
 {
-	return atomic_read(&shost->host_busy);
+	struct scsi_host_mq_in_flight in_flight = {
+		.cnt = 0,
+	};
+
+	if (!shost->use_blk_mq)
+		return atomic_read(&shost->host_busy);
+
+	blk_mq_tagset_busy_iter(&shost->tag_set, scsi_host_check_in_flight,
+			&in_flight);
+	return in_flight.cnt;
 }
 EXPORT_SYMBOL(scsi_host_busy);
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 0dfec0dedd5e..dc437c642934 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -345,7 +345,8 @@  static void scsi_dec_host_busy(struct Scsi_Host *shost)
 	unsigned long flags;
 
 	rcu_read_lock();
-	atomic_dec(&shost->host_busy);
+	if (!shost->use_blk_mq)
+		atomic_dec(&shost->host_busy);
 	if (unlikely(scsi_host_in_recovery(shost))) {
 		spin_lock_irqsave(shost->host_lock, flags);
 		if (shost->host_failed || shost->host_eh_scheduled)
@@ -444,7 +445,12 @@  static inline bool scsi_target_is_busy(struct scsi_target *starget)
 
 static inline bool scsi_host_is_busy(struct Scsi_Host *shost)
 {
-	if (shost->can_queue > 0 &&
+	/*
+	 * blk-mq can handle host queue busy efficiently via host-wide driver
+	 * tag allocation
+	 */
+
+	if (!shost->use_blk_mq && shost->can_queue > 0 &&
 	    atomic_read(&shost->host_busy) >= shost->can_queue)
 		return true;
 	if (atomic_read(&shost->host_blocked) > 0)
@@ -1539,9 +1545,12 @@  static inline int scsi_host_queue_ready(struct request_queue *q,
 	if (scsi_host_in_recovery(shost))
 		return 0;
 
-	busy = atomic_inc_return(&shost->host_busy) - 1;
+	if (!shost->use_blk_mq)
+		busy = atomic_inc_return(&shost->host_busy) - 1;
+	else
+		busy = 0;
 	if (atomic_read(&shost->host_blocked) > 0) {
-		if (busy)
+		if (busy || scsi_host_busy(shost))
 			goto starved;
 
 		/*
@@ -1555,7 +1564,7 @@  static inline int scsi_host_queue_ready(struct request_queue *q,
 				     "unblocking host at zero depth\n"));
 	}
 
-	if (shost->can_queue > 0 && busy >= shost->can_queue)
+	if (!shost->use_blk_mq && shost->can_queue > 0 && busy >= shost->can_queue)
 		goto starved;
 	if (shost->host_self_blocked)
 		goto starved;
@@ -1641,7 +1650,9 @@  static void scsi_kill_request(struct request *req, struct request_queue *q)
 	 * with the locks as normal issue path does.
 	 */
 	atomic_inc(&sdev->device_busy);
-	atomic_inc(&shost->host_busy);
+
+	if (!shost->use_blk_mq)
+		atomic_inc(&shost->host_busy);
 	if (starget->can_queue > 0)
 		atomic_inc(&starget->target_busy);