@@ -38,6 +38,7 @@
#include <scsi/scsi_device.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_transport.h>
+#include <scsi/scsi_cmnd.h>
#include "scsi_priv.h"
#include "scsi_logging.h"
@@ -554,13 +555,35 @@ struct Scsi_Host *scsi_host_get(struct Scsi_Host *shost)
}
EXPORT_SYMBOL(scsi_host_get);
+struct scsi_host_mq_in_flight {
+ int cnt;
+};
+
+static bool scsi_host_check_in_flight(struct request *rq, void *data,
+ bool reserved)
+{
+ struct scsi_host_mq_in_flight *in_flight = data;
+ struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
+
+ if (test_bit(SCMD_STATE_INFLIGHT, &cmd->state))
+ in_flight->cnt++;
+
+ return true;
+}
+
/**
* scsi_host_busy - Return the host busy counter
* @shost: Pointer to Scsi_Host to inc.
**/
int scsi_host_busy(struct Scsi_Host *shost)
{
- return atomic_read(&shost->host_busy);
+ struct scsi_host_mq_in_flight in_flight = {
+ .cnt = 0,
+ };
+
+ blk_mq_tagset_busy_iter(&shost->tag_set, scsi_host_check_in_flight,
+ &in_flight);
+ return in_flight.cnt;
}
EXPORT_SYMBOL(scsi_host_busy);
@@ -186,7 +186,7 @@ void scsi_finish_command(struct scsi_cmnd *cmd)
struct scsi_driver *drv;
unsigned int good_bytes;
- scsi_device_unbusy(sdev);
+ scsi_device_unbusy(sdev, cmd);
/*
* Clear the flags that say that the device/target/host is no longer
@@ -189,7 +189,7 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy)
* active on the host/device.
*/
if (unbusy)
- scsi_device_unbusy(device);
+ scsi_device_unbusy(device, cmd);
/*
* Requeue this command. It will go before all other commands
@@ -329,12 +329,12 @@ static void scsi_init_cmd_errh(struct scsi_cmnd *cmd)
* host_failed counter or that it notices the shost state change made by
* scsi_eh_scmd_add().
*/
-static void scsi_dec_host_busy(struct Scsi_Host *shost)
+static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
{
unsigned long flags;
rcu_read_lock();
- atomic_dec(&shost->host_busy);
+ clear_bit(SCMD_STATE_INFLIGHT, &cmd->state);
if (unlikely(scsi_host_in_recovery(shost))) {
spin_lock_irqsave(shost->host_lock, flags);
if (shost->host_failed || shost->host_eh_scheduled)
@@ -344,12 +344,12 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost)
rcu_read_unlock();
}
-void scsi_device_unbusy(struct scsi_device *sdev)
+void scsi_device_unbusy(struct scsi_device *sdev, struct scsi_cmnd *cmd)
{
struct Scsi_Host *shost = sdev->host;
struct scsi_target *starget = scsi_target(sdev);
- scsi_dec_host_busy(shost);
+ scsi_dec_host_busy(shost, cmd);
if (starget->can_queue > 0)
atomic_dec(&starget->target_busy);
@@ -430,9 +430,6 @@ static inline bool scsi_target_is_busy(struct scsi_target *starget)
static inline bool scsi_host_is_busy(struct Scsi_Host *shost)
{
- if (shost->can_queue > 0 &&
- atomic_read(&shost->host_busy) >= shost->can_queue)
- return true;
if (atomic_read(&shost->host_blocked) > 0)
return true;
if (shost->host_self_blocked)
@@ -1139,6 +1136,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
unsigned int flags = cmd->flags & SCMD_PRESERVED_FLAGS;
unsigned long jiffies_at_alloc;
int retries;
+ bool in_flight;
if (!blk_rq_is_scsi(rq) && !(flags & SCMD_INITIALIZED)) {
flags |= SCMD_INITIALIZED;
@@ -1147,6 +1145,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
jiffies_at_alloc = cmd->jiffies_at_alloc;
retries = cmd->retries;
+ in_flight = test_bit(SCMD_STATE_INFLIGHT, &cmd->state);
/* zero out the cmd, except for the embedded scsi_request */
memset((char *)cmd + sizeof(cmd->req), 0,
sizeof(*cmd) - sizeof(cmd->req) + dev->host->hostt->cmd_size);
@@ -1158,6 +1157,8 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
cmd->jiffies_at_alloc = jiffies_at_alloc;
cmd->retries = retries;
+ if (in_flight)
+ set_bit(SCMD_STATE_INFLIGHT, &cmd->state);
scsi_add_cmd_to_list(cmd);
}
@@ -1367,16 +1368,14 @@ static inline int scsi_target_queue_ready(struct Scsi_Host *shost,
*/
static inline int scsi_host_queue_ready(struct request_queue *q,
struct Scsi_Host *shost,
- struct scsi_device *sdev)
+ struct scsi_device *sdev,
+ struct scsi_cmnd *cmd)
{
- unsigned int busy;
-
if (scsi_host_in_recovery(shost))
return 0;
- busy = atomic_inc_return(&shost->host_busy) - 1;
if (atomic_read(&shost->host_blocked) > 0) {
- if (busy)
+ if (scsi_host_busy(shost) > 0)
goto starved;
/*
@@ -1390,8 +1389,6 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
"unblocking host at zero depth\n"));
}
- if (shost->can_queue > 0 && busy >= shost->can_queue)
- goto starved;
if (shost->host_self_blocked)
goto starved;
@@ -1403,6 +1400,8 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
spin_unlock_irq(shost->host_lock);
}
+ set_bit(SCMD_STATE_INFLIGHT, &cmd->state);
+
return 1;
starved:
@@ -1411,7 +1410,7 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
list_add_tail(&sdev->starved_entry, &shost->starved_list);
spin_unlock_irq(shost->host_lock);
out_dec:
- scsi_dec_host_busy(shost);
+ scsi_dec_host_busy(shost, cmd);
return 0;
}
@@ -1665,7 +1664,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
ret = BLK_STS_RESOURCE;
if (!scsi_target_queue_ready(shost, sdev))
goto out_put_budget;
- if (!scsi_host_queue_ready(q, shost, sdev))
+ if (!scsi_host_queue_ready(q, shost, sdev, cmd))
goto out_dec_target_busy;
if (!(req->rq_flags & RQF_DONTPREP)) {
@@ -1697,7 +1696,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_STS_OK;
out_dec_host_busy:
- scsi_dec_host_busy(shost);
+ scsi_dec_host_busy(shost, cmd);
out_dec_target_busy:
if (scsi_target(sdev)->can_queue > 0)
atomic_dec(&scsi_target(sdev)->target_busy);
@@ -87,7 +87,7 @@ int scsi_noretry_cmd(struct scsi_cmnd *scmd);
extern void scsi_add_cmd_to_list(struct scsi_cmnd *cmd);
extern void scsi_del_cmd_from_list(struct scsi_cmnd *cmd);
extern int scsi_maybe_unblock_host(struct scsi_device *sdev);
-extern void scsi_device_unbusy(struct scsi_device *sdev);
+extern void scsi_device_unbusy(struct scsi_device *sdev, struct scsi_cmnd *cmd);
extern void scsi_queue_insert(struct scsi_cmnd *cmd, int reason);
extern void scsi_io_completion(struct scsi_cmnd *, unsigned int);
extern void scsi_run_host_queues(struct Scsi_Host *shost);
@@ -63,6 +63,7 @@ struct scsi_pointer {
/* for scmd->state */
#define SCMD_STATE_COMPLETE 0
+#define SCMD_STATE_INFLIGHT 1
struct scsi_cmnd {
struct scsi_request req;
@@ -551,7 +551,6 @@ struct Scsi_Host {
/* Area to keep a shared tag map */
struct blk_mq_tag_set tag_set;
- atomic_t host_busy; /* commands actually active on low-level */
atomic_t host_blocked;
unsigned int host_failed; /* commands that failed.
It isn't necessary to check the host depth in scsi_queue_rq() any more since it has been respected by blk-mq before calling scsi_queue_rq() via getting driver tag. Lots of LUNs may attach to same host and per-host IOPS may reach millions, so we should avoid expensive atomic operations on the host-wide counter in the IO path. This patch implements scsi_host_busy() via blk_mq_tagset_busy_iter() with one scsi command state for reading the count of busy IOs for scsi_mq. It is observed that IOPS is increased by 15% in IO test on scsi_debug (32 LUNs, 32 submit queues, 1024 can_queue, libaio/dio) in a dual-socket system. V2: - introduce SCMD_STATE_INFLIGHT for getting accurate host busy via blk_mq_tagset_busy_iter() - verified that original Jens's report[1] is fixed - verified that SCSI timeout/abort works fine [1] https://www.spinics.net/lists/linux-scsi/msg122867.html [2] V1 & its revert: d772a65d8a6c Revert "scsi: core: avoid host-wide host_busy counter for scsi_mq" 23aa8e69f2c6 Revert "scsi: core: fix scsi_host_queue_ready" 265d59aacbce scsi: core: fix scsi_host_queue_ready 328728630d9f scsi: core: avoid host-wide host_busy counter for scsi_mq Cc: Jens Axboe <axboe@kernel.dk> Cc: Ewan D. Milne <emilne@redhat.com> Cc: Omar Sandoval <osandov@fb.com>, Cc: "Martin K. Petersen" <martin.petersen@oracle.com>, Cc: James Bottomley <james.bottomley@hansenpartnership.com>, Cc: Christoph Hellwig <hch@lst.de>, Cc: Kashyap Desai <kashyap.desai@broadcom.com> Cc: Hannes Reinecke <hare@suse.de> Cc: Laurence Oberman <loberman@redhat.com> Cc: Bart Van Assche <bart.vanassche@wdc.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> --- drivers/scsi/hosts.c | 25 ++++++++++++++++++++++++- drivers/scsi/scsi.c | 2 +- drivers/scsi/scsi_lib.c | 35 +++++++++++++++++------------------ drivers/scsi/scsi_priv.h | 2 +- include/scsi/scsi_cmnd.h | 1 + include/scsi/scsi_host.h | 1 - 6 files changed, 44 insertions(+), 22 deletions(-)