@@ -607,6 +607,11 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
else
dev_info(ctrl->ctrl.device,
"failed to connect queue: %d ret=%d\n", idx, ret);
+
+ if (idx > ctrl->ctrl.opts->nr_io_queues +
+ ctrl->ctrl.opts->nr_write_queues)
+ ib_change_cq_ctx(ctrl->queues[idx].ib_cq, IB_POLL_DIRECT);
+
return ret;
}
@@ -646,6 +651,7 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
ibdev->num_comp_vectors);
nr_io_queues += min(opts->nr_write_queues, num_online_cpus());
+ nr_io_queues += min(opts->nr_poll_queues, num_online_cpus());
ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
if (ret)
@@ -716,7 +722,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
set->driver_data = ctrl;
set->nr_hw_queues = nctrl->queue_count - 1;
set->timeout = NVME_IO_TIMEOUT;
- set->nr_maps = 2 /* default + read */;
+ set->nr_maps = HCTX_MAX_TYPES;
}
ret = blk_mq_alloc_tag_set(set);
@@ -1742,6 +1748,14 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_STS_IOERR;
}
+static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx)
+{
+ struct nvme_rdma_queue *queue = hctx->driver_data;
+ struct ib_cq *cq = queue->ib_cq;
+
+ return ib_process_cq_direct(cq, 16);
+}
+
static void nvme_rdma_complete_rq(struct request *rq)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
@@ -1772,6 +1786,21 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
ctrl->device->dev, 0);
blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ],
ctrl->device->dev, 0);
+
+ if (ctrl->ctrl.opts->nr_poll_queues) {
+ set->map[HCTX_TYPE_POLL].nr_queues =
+ ctrl->ctrl.opts->nr_poll_queues;
+ set->map[HCTX_TYPE_POLL].queue_offset =
+ ctrl->ctrl.opts->nr_io_queues;
+ if (ctrl->ctrl.opts->nr_write_queues)
+ set->map[HCTX_TYPE_POLL].queue_offset +=
+ ctrl->ctrl.opts->nr_write_queues;
+ } else {
+ set->map[HCTX_TYPE_POLL].nr_queues =
+ ctrl->ctrl.opts->nr_io_queues;
+ set->map[HCTX_TYPE_POLL].queue_offset = 0;
+ }
+ blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
return 0;
}
@@ -1783,6 +1812,7 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = {
.init_hctx = nvme_rdma_init_hctx,
.timeout = nvme_rdma_timeout,
.map_queues = nvme_rdma_map_queues,
+ .poll = nvme_rdma_poll,
};
static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
@@ -1927,7 +1957,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
- ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues + 1;
+ ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues +
+ opts->nr_poll_queues + 1;
ctrl->ctrl.sqsize = opts->queue_size - 1;
ctrl->ctrl.kato = opts->kato;
Every nvmf queue starts with a connect message that is the slow path at setup time, and there is no need for polling (it is actually hurtful). Instead, allocate the polling queue cq with IB_POLL_SOFTIRQ and switch it to IB_POLL_DIRECT where it makes sense. Signed-off-by: Sagi Grimberg <sagi@grimberg.me> --- drivers/nvme/host/rdma.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-)