diff mbox series

[4/5] nvme-tcp: support separate queue maps for read and write

Message ID 20181211104936.25333-5-sagi@grimberg.me (mailing list archive)
State New, archived
Headers show
Series implement nvmf read/write queue maps | expand

Commit Message

Sagi Grimberg Dec. 11, 2018, 10:49 a.m. UTC
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/tcp.c | 53 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 6 deletions(-)

Comments

Christoph Hellwig Dec. 11, 2018, 1:41 p.m. UTC | #1
On Tue, Dec 11, 2018 at 02:49:34AM -0800, Sagi Grimberg wrote:
> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
> ---
>  drivers/nvme/host/tcp.c | 53 ++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 47 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 15543358e245..5c0ba99fb105 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -1215,7 +1215,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
>  	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
>  	struct nvme_tcp_queue *queue = &ctrl->queues[qid];
>  	struct linger sol = { .l_onoff = 1, .l_linger = 0 };
> -	int ret, opt, rcv_pdu_size;
> +	int ret, opt, rcv_pdu_size, n;
>  
>  	queue->ctrl = ctrl;
>  	INIT_LIST_HEAD(&queue->send_list);
> @@ -1271,7 +1271,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
>  	}
>  
>  	queue->sock->sk->sk_allocation = GFP_ATOMIC;
> -	queue->io_cpu = (qid == 0) ? 0 : qid - 1;
> +	n = (qid ? qid - 1 : 0) % num_online_cpus();
> +	queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
>  	queue->request = NULL;
>  	queue->data_remaining = 0;
>  	queue->ddgst_remaining = 0;
> @@ -1433,6 +1434,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
>  		set->driver_data = ctrl;
>  		set->nr_hw_queues = nctrl->queue_count - 1;
>  		set->timeout = NVME_IO_TIMEOUT;
> +		set->nr_maps = 2 /* default + read */;
>  	}
>  
>  	ret = blk_mq_alloc_tag_set(set);
> @@ -1527,7 +1529,12 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
>  
>  static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl)
>  {
> -	return min(ctrl->queue_count - 1, num_online_cpus());
> +	unsigned int nr_io_queues;
> +
> +	nr_io_queues = min(ctrl->opts->nr_io_queues, num_online_cpus());
> +	nr_io_queues += min(ctrl->opts->nr_write_queues, num_online_cpus());
> +
> +	return nr_io_queues;
>  }
>  
>  static int nvme_alloc_io_queues(struct nvme_ctrl *ctrl)
> @@ -2052,6 +2059,38 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	return BLK_STS_OK;
>  }
>  
> +static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
> +{
> +	struct nvme_tcp_ctrl *ctrl = set->driver_data;
> +	struct blk_mq_queue_map *map;
> +
> +	if (ctrl->ctrl.opts->nr_write_queues) {
> +		/* separate read/write queues */
> +		map = &set->map[HCTX_TYPE_DEFAULT];
> +		map->queue_offset = 0;
> +		map->nr_queues = ctrl->ctrl.opts->nr_write_queues;
> +		blk_mq_map_queues(map);

Shouldn't this use nr_io_queues?

> +		map = &set->map[HCTX_TYPE_READ];
> +		map->nr_queues = ctrl->ctrl.opts->nr_io_queues;
> +		map->queue_offset = ctrl->ctrl.opts->nr_write_queues;
> +		blk_mq_map_queues(map);
> +	} else {
> +		/* mixed read/write queues */
> +		map = &set->map[HCTX_TYPE_DEFAULT];
> +		map->queue_offset = 0;
> +		map->nr_queues = ctrl->ctrl.opts->nr_io_queues;
> +		blk_mq_map_queues(map);
> +
> +		map = &set->map[HCTX_TYPE_READ];
> +		map->queue_offset = 0;
> +		map->nr_queues = ctrl->ctrl.opts->nr_io_queues;
> +		blk_mq_map_queues(map);

Also I find the reused local map variable a little odd and not helpful
for readability.  What about something like:

static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
{
	struct nvme_tcp_ctrl *ctrl = set->driver_data;

	set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
	set->map[HCTX_TYPE_DEFAULT].nr_queues = ctrl->ctrl.opts->nr_io_queues;
	blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);

	if (ctrl->ctrl.opts->nr_write_queues) {
		/* separate read/write queues */
		set->map[HCTX_TYPE_READ].queue_offset =
				ctrl->ctrl.opts->nr_io_queues;
		set->map[HCTX_TYPE_READ].nr_queues =
				ctrl->ctrl.opts->nr_write_queues;
	} else {
		/* mixed read/write queues */
		set->map[HCTX_TYPE_READ].queue_offset = 0;
		set->map[HCTX_TYPE_READ].nr_queues =
				ctrl->ctrl.opts->nr_io_queues;
	}
	blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
	return 0;
}
Sagi Grimberg Dec. 11, 2018, 11:11 p.m. UTC | #2
>> +static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
>> +{
>> +	struct nvme_tcp_ctrl *ctrl = set->driver_data;
>> +	struct blk_mq_queue_map *map;
>> +
>> +	if (ctrl->ctrl.opts->nr_write_queues) {
>> +		/* separate read/write queues */
>> +		map = &set->map[HCTX_TYPE_DEFAULT];
>> +		map->queue_offset = 0;
>> +		map->nr_queues = ctrl->ctrl.opts->nr_write_queues;
>> +		blk_mq_map_queues(map);
> 
> Shouldn't this use nr_io_queues?

The intent is that HCTX_TYPE_READ will always use nr_io_queues and
HCTX_TYPE_DEFAULT will use nr_write_queues.. I'll document that
in the change log.

> Also I find the reused local map variable a little odd and not helpful
> for readability.  What about something like:
> 
> static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
> {
> 	struct nvme_tcp_ctrl *ctrl = set->driver_data;
> 
> 	set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
> 	set->map[HCTX_TYPE_DEFAULT].nr_queues = ctrl->ctrl.opts->nr_io_queues;
> 	blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
> 
> 	if (ctrl->ctrl.opts->nr_write_queues) {
> 		/* separate read/write queues */
> 		set->map[HCTX_TYPE_READ].queue_offset =
> 				ctrl->ctrl.opts->nr_io_queues;
> 		set->map[HCTX_TYPE_READ].nr_queues =
> 				ctrl->ctrl.opts->nr_write_queues;
> 	} else {
> 		/* mixed read/write queues */
> 		set->map[HCTX_TYPE_READ].queue_offset = 0;
> 		set->map[HCTX_TYPE_READ].nr_queues =
> 				ctrl->ctrl.opts->nr_io_queues;
> 	}
> 	blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
> 	return 0;
> }

That's better.. I'll update the patch with a change according to my
note above..
diff mbox series

Patch

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 15543358e245..5c0ba99fb105 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1215,7 +1215,7 @@  static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
 	struct nvme_tcp_queue *queue = &ctrl->queues[qid];
 	struct linger sol = { .l_onoff = 1, .l_linger = 0 };
-	int ret, opt, rcv_pdu_size;
+	int ret, opt, rcv_pdu_size, n;
 
 	queue->ctrl = ctrl;
 	INIT_LIST_HEAD(&queue->send_list);
@@ -1271,7 +1271,8 @@  static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 	}
 
 	queue->sock->sk->sk_allocation = GFP_ATOMIC;
-	queue->io_cpu = (qid == 0) ? 0 : qid - 1;
+	n = (qid ? qid - 1 : 0) % num_online_cpus();
+	queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
 	queue->request = NULL;
 	queue->data_remaining = 0;
 	queue->ddgst_remaining = 0;
@@ -1433,6 +1434,7 @@  static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
 		set->driver_data = ctrl;
 		set->nr_hw_queues = nctrl->queue_count - 1;
 		set->timeout = NVME_IO_TIMEOUT;
+		set->nr_maps = 2 /* default + read */;
 	}
 
 	ret = blk_mq_alloc_tag_set(set);
@@ -1527,7 +1529,12 @@  static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
 
 static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl)
 {
-	return min(ctrl->queue_count - 1, num_online_cpus());
+	unsigned int nr_io_queues;
+
+	nr_io_queues = min(ctrl->opts->nr_io_queues, num_online_cpus());
+	nr_io_queues += min(ctrl->opts->nr_write_queues, num_online_cpus());
+
+	return nr_io_queues;
 }
 
 static int nvme_alloc_io_queues(struct nvme_ctrl *ctrl)
@@ -2052,6 +2059,38 @@  static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
 	return BLK_STS_OK;
 }
 
+static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
+{
+	struct nvme_tcp_ctrl *ctrl = set->driver_data;
+	struct blk_mq_queue_map *map;
+
+	if (ctrl->ctrl.opts->nr_write_queues) {
+		/* separate read/write queues */
+		map = &set->map[HCTX_TYPE_DEFAULT];
+		map->queue_offset = 0;
+		map->nr_queues = ctrl->ctrl.opts->nr_write_queues;
+		blk_mq_map_queues(map);
+
+		map = &set->map[HCTX_TYPE_READ];
+		map->nr_queues = ctrl->ctrl.opts->nr_io_queues;
+		map->queue_offset = ctrl->ctrl.opts->nr_write_queues;
+		blk_mq_map_queues(map);
+	} else {
+		/* mixed read/write queues */
+		map = &set->map[HCTX_TYPE_DEFAULT];
+		map->queue_offset = 0;
+		map->nr_queues = ctrl->ctrl.opts->nr_io_queues;
+		blk_mq_map_queues(map);
+
+		map = &set->map[HCTX_TYPE_READ];
+		map->queue_offset = 0;
+		map->nr_queues = ctrl->ctrl.opts->nr_io_queues;
+		blk_mq_map_queues(map);
+	}
+
+	return 0;
+}
+
 static struct blk_mq_ops nvme_tcp_mq_ops = {
 	.queue_rq	= nvme_tcp_queue_rq,
 	.complete	= nvme_complete_rq,
@@ -2059,6 +2098,7 @@  static struct blk_mq_ops nvme_tcp_mq_ops = {
 	.exit_request	= nvme_tcp_exit_request,
 	.init_hctx	= nvme_tcp_init_hctx,
 	.timeout	= nvme_tcp_timeout,
+	.map_queues	= nvme_tcp_map_queues,
 };
 
 static struct blk_mq_ops nvme_tcp_admin_mq_ops = {
@@ -2113,7 +2153,7 @@  static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
 
 	INIT_LIST_HEAD(&ctrl->list);
 	ctrl->ctrl.opts = opts;
-	ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
+	ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues + 1;
 	ctrl->ctrl.sqsize = opts->queue_size - 1;
 	ctrl->ctrl.kato = opts->kato;
 
@@ -2155,7 +2195,7 @@  static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
 		goto out_free_ctrl;
 	}
 
-	ctrl->queues = kcalloc(opts->nr_io_queues + 1, sizeof(*ctrl->queues),
+	ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
 				GFP_KERNEL);
 	if (!ctrl->queues) {
 		ret = -ENOMEM;
@@ -2206,7 +2246,8 @@  static struct nvmf_transport_ops nvme_tcp_transport = {
 	.required_opts	= NVMF_OPT_TRADDR,
 	.allowed_opts	= NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
 			  NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
-			  NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST,
+			  NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
+			  NVMF_OPT_NR_IO_QUEUES,
 	.create_ctrl	= nvme_tcp_create_ctrl,
 };