@@ -743,6 +743,9 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
init_llist_head(&q->sq.cmd_list);
INIT_WORK(&q->sq.work, target_queued_submit_work);
+
+ init_llist_head(&q->cq.cmd_list);
+ INIT_WORK(&q->cq.work, target_queued_compl_work);
}
dev->se_hba = hba;
@@ -154,6 +154,7 @@ bool target_check_wce(struct se_device *dev);
bool target_check_fua(struct se_device *dev);
void __target_execute_cmd(struct se_cmd *, bool);
void target_queued_submit_work(struct work_struct *work);
+void target_queued_compl_work(struct work_struct *work);
/* target_core_stat.c */
void target_stat_setup_dev_default_groups(struct se_device *);
@@ -55,7 +55,6 @@ static void transport_complete_task_attr(struct se_cmd *cmd);
static void translate_sense_reason(struct se_cmd *cmd, sense_reason_t reason);
static void transport_handle_queue_full(struct se_cmd *cmd,
struct se_device *dev, int err, bool write_pending);
-static void target_complete_ok_work(struct work_struct *work);
int init_se_kmem_caches(void)
{
@@ -732,14 +731,6 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd)
percpu_ref_put(&lun->lun_ref);
}
-static void target_complete_failure_work(struct work_struct *work)
-{
- struct se_cmd *cmd = container_of(work, struct se_cmd, work);
-
- transport_generic_request_failure(cmd,
- TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE);
-}
-
/*
* Used when asking transport to copy Sense Data from the underlying
* Linux/SCSI struct scsi_cmnd
@@ -827,11 +818,20 @@ static void target_handle_abort(struct se_cmd *cmd)
transport_cmd_check_stop_to_fabric(cmd);
}
-static void target_abort_work(struct work_struct *work)
+static void target_queue_cmd_work(struct se_cmd_queue *q, struct se_cmd *se_cmd,
+ int cpu, struct workqueue_struct *wq)
{
- struct se_cmd *cmd = container_of(work, struct se_cmd, work);
+ llist_add(&se_cmd->se_cmd_list, &q->cmd_list);
+ queue_work_on(cpu, wq, &q->work);
+}
- target_handle_abort(cmd);
+static void target_queue_cmd_compl(struct se_cmd *se_cmd)
+{
+ struct se_device *se_dev = se_cmd->se_dev;
+ int cpu = se_cmd->cpuid;
+
+ target_queue_cmd_work(&se_dev->queues[cpu].cq, se_cmd, cpu,
+ target_completion_wq);
}
static bool target_cmd_interrupted(struct se_cmd *cmd)
@@ -841,8 +841,8 @@ static bool target_cmd_interrupted(struct se_cmd *cmd)
if (cmd->transport_state & CMD_T_ABORTED) {
if (cmd->transport_complete_callback)
cmd->transport_complete_callback(cmd, false, &post_ret);
- INIT_WORK(&cmd->work, target_abort_work);
- queue_work(target_completion_wq, &cmd->work);
+
+ target_queue_cmd_compl(cmd);
return true;
} else if (cmd->transport_state & CMD_T_STOP) {
if (cmd->transport_complete_callback)
@@ -857,7 +857,6 @@ static bool target_cmd_interrupted(struct se_cmd *cmd)
/* May be called from interrupt context so must not sleep. */
void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
{
- int success;
unsigned long flags;
if (target_cmd_interrupted(cmd))
@@ -866,25 +865,11 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
cmd->scsi_status = scsi_status;
spin_lock_irqsave(&cmd->t_state_lock, flags);
- switch (cmd->scsi_status) {
- case SAM_STAT_CHECK_CONDITION:
- if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE)
- success = 1;
- else
- success = 0;
- break;
- default:
- success = 1;
- break;
- }
-
cmd->t_state = TRANSPORT_COMPLETE;
cmd->transport_state |= (CMD_T_COMPLETE | CMD_T_ACTIVE);
spin_unlock_irqrestore(&cmd->t_state_lock, flags);
- INIT_WORK(&cmd->work, success ? target_complete_ok_work :
- target_complete_failure_work);
- queue_work_on(cmd->cpuid, target_completion_wq, &cmd->work);
+ target_queue_cmd_compl(cmd);
}
EXPORT_SYMBOL(target_complete_cmd);
@@ -1894,13 +1879,6 @@ void target_queued_submit_work(struct work_struct *work)
target_unplug_device(se_plug);
}
-static void target_queue_cmd_work(struct se_cmd_queue *q, struct se_cmd *se_cmd,
- int cpu)
-{
- llist_add(&se_cmd->se_cmd_list, &q->cmd_list);
- queue_work_on(cpu, target_submission_wq, &q->work);
-}
-
/**
* target_queue_cmd_submit - queue the cmd to run on the LIO workqueue
* @se_cmd: command descriptor to submit
@@ -1951,7 +1929,8 @@ target_queue_cmd_submit(struct se_cmd *se_cmd, struct se_session *se_sess,
cpu = se_cmd->cpuid;
se_dev = se_cmd->se_dev;
- target_queue_cmd_work(&se_dev->queues[cpu].sq, se_cmd, cpu);
+ target_queue_cmd_work(&se_dev->queues[cpu].sq, se_cmd, cpu,
+ target_submission_wq);
return 0;
}
EXPORT_SYMBOL_GPL(target_queue_cmd_submit);
@@ -2054,8 +2033,7 @@ void transport_generic_request_failure(struct se_cmd *cmd,
cmd->transport_complete_callback(cmd, false, &post_ret);
if (cmd->transport_state & CMD_T_ABORTED) {
- INIT_WORK(&cmd->work, target_abort_work);
- queue_work(target_completion_wq, &cmd->work);
+ target_queue_cmd_compl(cmd);
return;
}
@@ -2480,10 +2458,32 @@ static bool target_read_prot_action(struct se_cmd *cmd)
return false;
}
-static void target_complete_ok_work(struct work_struct *work)
+static void target_complete_cmd_work(struct se_cmd *cmd)
{
- struct se_cmd *cmd = container_of(work, struct se_cmd, work);
- int ret;
+ int ret, success;
+
+ if (cmd->transport_state & CMD_T_ABORTED) {
+ target_handle_abort(cmd);
+ return;
+ }
+
+ switch (cmd->scsi_status) {
+ case SAM_STAT_CHECK_CONDITION:
+ if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE)
+ success = 1;
+ else
+ success = 0;
+ break;
+ default:
+ success = 1;
+ break;
+ }
+
+ if (!success) {
+ transport_generic_request_failure(cmd,
+ TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE);
+ return;
+ }
/*
* Check if we need to move delayed/dormant tasks from cmds on the
@@ -2625,6 +2625,18 @@ static void target_complete_ok_work(struct work_struct *work)
transport_handle_queue_full(cmd, cmd->se_dev, ret, false);
}
+void target_queued_compl_work(struct work_struct *work)
+{
+ struct se_cmd_queue *cq = container_of(work, struct se_cmd_queue,
+ work);
+ struct se_cmd *se_cmd, *next_cmd;
+ struct llist_node *cmd_list;
+
+ cmd_list = llist_del_all(&cq->cmd_list);
+ llist_for_each_entry_safe(se_cmd, next_cmd, cmd_list, se_cmd_list)
+ target_complete_cmd_work(se_cmd);
+}
+
void target_free_sgl(struct scatterlist *sgl, int nents)
{
sgl_free_n_order(sgl, nents, 0);
@@ -777,6 +777,7 @@ struct se_device_queue {
struct list_head state_list;
spinlock_t lock;
struct se_cmd_queue sq;
+ struct se_cmd_queue cq;
};
struct se_device {
Doing a work per cmd can lead to lots of threads being created. This patch just replaces the completion work per cmd with a per cpu list. Combined with the first patches this allows tcm loop on top of initiators like iser to go from around 700K IOPs to 1000K and reduces the number of threads that get created when the system is under heavy load and hitting the initiator drivers tagging limits. Signed-off-by: Mike Christie <michael.christie@oracle.com> --- drivers/target/target_core_device.c | 3 + drivers/target/target_core_internal.h | 1 + drivers/target/target_core_transport.c | 98 +++++++++++++++----------- include/target/target_core_base.h | 1 + 4 files changed, 60 insertions(+), 43 deletions(-)