diff mbox series

[05/11] vhost scsi: use lio wq cmd submission helper

Message ID 20210204113513.93204-6-michael.christie@oracle.com (mailing list archive)
State New, archived
Headers show
Series target: fix cmd plugging and completion | expand

Commit Message

Mike Christie Feb. 4, 2021, 11:35 a.m. UTC
Convert vhost-scsi to use the lio wq cmd submission helper.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
 drivers/vhost/scsi.c | 35 +++++++----------------------------
 1 file changed, 7 insertions(+), 28 deletions(-)

Comments

Michael S. Tsirkin Feb. 5, 2021, 4:17 p.m. UTC | #1
On Thu, Feb 04, 2021 at 05:35:07AM -0600, Mike Christie wrote:
> @@ -1132,14 +1127,8 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
>  		 * vhost_scsi_queue_data_in() and vhost_scsi_queue_status()
>  		 */
>  		cmd->tvc_vq_desc = vc.head;
> -		/*
> -		 * Dispatch cmd descriptor for cmwq execution in process
> -		 * context provided by vhost_scsi_workqueue.  This also ensures
> -		 * cmd is executed on the same kworker CPU as this vhost
> -		 * thread to gain positive L2 cache locality effects.
> -		 */
> -		INIT_WORK(&cmd->work, vhost_scsi_submission_work);
> -		queue_work(vhost_scsi_workqueue, &cmd->work);
> +		target_queue_cmd_submit(tpg->tpg_nexus->tvn_se_sess,
> +					&cmd->tvc_se_cmd);
>  		ret = 0;
>  err:
>  		/*

What about this aspect? Will things still stay on the same CPU?
Mike Christie Feb. 5, 2021, 5:38 p.m. UTC | #2
On 2/5/21 10:17 AM, Michael S. Tsirkin wrote:
> On Thu, Feb 04, 2021 at 05:35:07AM -0600, Mike Christie wrote:
>> @@ -1132,14 +1127,8 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
>>  		 * vhost_scsi_queue_data_in() and vhost_scsi_queue_status()
>>  		 */
>>  		cmd->tvc_vq_desc = vc.head;
>> -		/*
>> -		 * Dispatch cmd descriptor for cmwq execution in process
>> -		 * context provided by vhost_scsi_workqueue.  This also ensures
>> -		 * cmd is executed on the same kworker CPU as this vhost
>> -		 * thread to gain positive L2 cache locality effects.
>> -		 */
>> -		INIT_WORK(&cmd->work, vhost_scsi_submission_work);
>> -		queue_work(vhost_scsi_workqueue, &cmd->work);
>> +		target_queue_cmd_submit(tpg->tpg_nexus->tvn_se_sess,
>> +					&cmd->tvc_se_cmd);
>>  		ret = 0;
>>  err:
>>  		/*
> 
> What about this aspect? Will things still stay on the same CPU
Yes, if that is what it's configured to do.

On the submission path there is no change in behavior. target_queue_cmd_submit
does queue_work_on so it executes the cmd on the same CPU in LIO. Once
LIO passes it to the block layer then that layer does whatever is setup.

On the completion path the low level works the same. The low level
driver goes by its ISRs/softirq/completion-thread settings, the block layer
then goes by the queue settings like rq_affinity.

The change in behavior is that in LIO we will do what was configured
in the layer below us instead of always trying to complete on the same
CPU it was submitted on.
Mike Christie Feb. 5, 2021, 6:04 p.m. UTC | #3
On 2/5/21 11:38 AM, Mike Christie wrote:
> On 2/5/21 10:17 AM, Michael S. Tsirkin wrote:
>> On Thu, Feb 04, 2021 at 05:35:07AM -0600, Mike Christie wrote:
>>> @@ -1132,14 +1127,8 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
>>>  		 * vhost_scsi_queue_data_in() and vhost_scsi_queue_status()
>>>  		 */
>>>  		cmd->tvc_vq_desc = vc.head;
>>> -		/*
>>> -		 * Dispatch cmd descriptor for cmwq execution in process
>>> -		 * context provided by vhost_scsi_workqueue.  This also ensures
>>> -		 * cmd is executed on the same kworker CPU as this vhost
>>> -		 * thread to gain positive L2 cache locality effects.
>>> -		 */
>>> -		INIT_WORK(&cmd->work, vhost_scsi_submission_work);
>>> -		queue_work(vhost_scsi_workqueue, &cmd->work);
>>> +		target_queue_cmd_submit(tpg->tpg_nexus->tvn_se_sess,
>>> +					&cmd->tvc_se_cmd);
>>>  		ret = 0;
>>>  err:
>>>  		/*
>>
>> What about this aspect? Will things still stay on the same CPU
> Yes, if that is what it's configured to do.

Oh yeah, I wasn't sure if you were only asking about the code in this
patch or the combined patchset. The above chunk modifies the submission
code. Like I wrote below, there are no changes CPU use wise in that path.

Patch:

[PATCH 11/11] target, vhost-scsi: don't switch cpus on completion

modifies the completion path in LIO so we can complete the cmd on the
CPU that the layers below LIO were configured to complete on. The user
can then configure those layers to complete on the specific CPU it was
submitted on, just one that shares a cache, or what the layer below the
block layer completed it on.


> 
> On the submission path there is no change in behavior. target_queue_cmd_submit
> does queue_work_on so it executes the cmd on the same CPU in LIO. Once
> LIO passes it to the block layer then that layer does whatever is setup.
> 
> On the completion path the low level works the same. The low level
> driver goes by its ISRs/softirq/completion-thread settings, the block layer
> then goes by the queue settings like rq_affinity.
> 
> The change in behavior is that in LIO we will do what was configured
> in the layer below us instead of always trying to complete on the same
> CPU it was submitted on.
>
diff mbox series

Patch

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 4ce9f00ae10e..aacad9e222ff 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -85,7 +85,7 @@  struct vhost_scsi_cmd {
 	/* The number of scatterlists associated with this cmd */
 	u32 tvc_sgl_count;
 	u32 tvc_prot_sgl_count;
-	/* Saved unpacked SCSI LUN for vhost_scsi_submission_work() */
+	/* Saved unpacked SCSI LUN for vhost_scsi_submit_queued_cmd() */
 	u32 tvc_lun;
 	/* Pointer to the SGL formatted memory from virtio-scsi */
 	struct scatterlist *tvc_sgl;
@@ -101,8 +101,6 @@  struct vhost_scsi_cmd {
 	struct vhost_scsi_nexus *tvc_nexus;
 	/* The TCM I/O descriptor that is accessed via container_of() */
 	struct se_cmd tvc_se_cmd;
-	/* work item used for cmwq dispatch to vhost_scsi_submission_work() */
-	struct work_struct work;
 	/* Copy of the incoming SCSI command descriptor block (CDB) */
 	unsigned char tvc_cdb[VHOST_SCSI_MAX_CDB_SIZE];
 	/* Sense buffer that will be mapped into outgoing status */
@@ -240,8 +238,6 @@  struct vhost_scsi_ctx {
 	struct iov_iter out_iter;
 };
 
-static struct workqueue_struct *vhost_scsi_workqueue;
-
 /* Global spinlock to protect vhost_scsi TPG list for vhost IOCTL access */
 static DEFINE_MUTEX(vhost_scsi_mutex);
 static LIST_HEAD(vhost_scsi_list);
@@ -782,12 +778,11 @@  static int vhost_scsi_to_tcm_attr(int attr)
 	return TCM_SIMPLE_TAG;
 }
 
-static void vhost_scsi_submission_work(struct work_struct *work)
+static void vhost_scsi_submit_queued_cmd(struct se_cmd *se_cmd)
 {
 	struct vhost_scsi_cmd *cmd =
-		container_of(work, struct vhost_scsi_cmd, work);
+		container_of(se_cmd, struct vhost_scsi_cmd, tvc_se_cmd);
 	struct vhost_scsi_nexus *tv_nexus;
-	struct se_cmd *se_cmd = &cmd->tvc_se_cmd;
 	struct scatterlist *sg_ptr, *sg_prot_ptr = NULL;
 	int rc;
 
@@ -1132,14 +1127,8 @@  vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 		 * vhost_scsi_queue_data_in() and vhost_scsi_queue_status()
 		 */
 		cmd->tvc_vq_desc = vc.head;
-		/*
-		 * Dispatch cmd descriptor for cmwq execution in process
-		 * context provided by vhost_scsi_workqueue.  This also ensures
-		 * cmd is executed on the same kworker CPU as this vhost
-		 * thread to gain positive L2 cache locality effects.
-		 */
-		INIT_WORK(&cmd->work, vhost_scsi_submission_work);
-		queue_work(vhost_scsi_workqueue, &cmd->work);
+		target_queue_cmd_submit(tpg->tpg_nexus->tvn_se_sess,
+					&cmd->tvc_se_cmd);
 		ret = 0;
 err:
 		/*
@@ -2466,6 +2455,7 @@  static const struct target_core_fabric_ops vhost_scsi_ops = {
 	.queue_status			= vhost_scsi_queue_status,
 	.queue_tm_rsp			= vhost_scsi_queue_tm_rsp,
 	.aborted_task			= vhost_scsi_aborted_task,
+	.submit_queued_cmd		= vhost_scsi_submit_queued_cmd,
 	/*
 	 * Setup callers for generic logic in target_core_fabric_configfs.c
 	 */
@@ -2489,17 +2479,9 @@  static int __init vhost_scsi_init(void)
 		" on "UTS_RELEASE"\n", VHOST_SCSI_VERSION, utsname()->sysname,
 		utsname()->machine);
 
-	/*
-	 * Use our own dedicated workqueue for submitting I/O into
-	 * target core to avoid contention within system_wq.
-	 */
-	vhost_scsi_workqueue = alloc_workqueue("vhost_scsi", 0, 0);
-	if (!vhost_scsi_workqueue)
-		goto out;
-
 	ret = vhost_scsi_register();
 	if (ret < 0)
-		goto out_destroy_workqueue;
+		goto out;
 
 	ret = target_register_template(&vhost_scsi_ops);
 	if (ret < 0)
@@ -2509,8 +2491,6 @@  static int __init vhost_scsi_init(void)
 
 out_vhost_scsi_deregister:
 	vhost_scsi_deregister();
-out_destroy_workqueue:
-	destroy_workqueue(vhost_scsi_workqueue);
 out:
 	return ret;
 };
@@ -2519,7 +2499,6 @@  static void vhost_scsi_exit(void)
 {
 	target_unregister_template(&vhost_scsi_ops);
 	vhost_scsi_deregister();
-	destroy_workqueue(vhost_scsi_workqueue);
 };
 
 MODULE_DESCRIPTION("VHOST_SCSI series fabric driver");