@@ -486,9 +486,13 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
if (!blk_mq_hw_queue_mapped(data.hctx))
goto out_queue_exit;
- WARN_ON_ONCE(blk_mq_hctx_use_managed_irq(data.hctx));
-
- cpu = blk_mq_first_mapped_cpu(data.hctx);
+ if (blk_mq_hctx_use_managed_irq(data.hctx)) {
+ cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
+ if (cpu >= nr_cpu_ids)
+ return ERR_PTR(ret);
+ } else {
+ cpu = blk_mq_first_mapped_cpu(data.hctx);
+ }
data.ctx = __blk_mq_get_ctx(q, cpu);
if (!q->elevator)
@@ -2841,6 +2841,17 @@ nvme_fc_complete_rq(struct request *rq)
nvme_fc_ctrl_put(ctrl);
}
+static int
+nvme_fc_map_queues(struct blk_mq_tag_set *set)
+{
+ struct nvme_fc_ctrl *ctrl = set->driver_data;
+
+ if (ctrl->lport->ops->map_queues)
+ return ctrl->lport->ops->map_queues(&ctrl->lport->localport, set);
+
+ return blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
+}
+
static const struct blk_mq_ops nvme_fc_mq_ops = {
.queue_rq = nvme_fc_queue_rq,
@@ -2849,6 +2860,7 @@ static const struct blk_mq_ops nvme_fc_mq_ops = {
.exit_request = nvme_fc_exit_request,
.init_hctx = nvme_fc_init_hctx,
.timeout = nvme_fc_timeout,
+ .map_queues = nvme_fc_map_queues,
};
static int
@@ -3392,6 +3404,7 @@ static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
.exit_request = nvme_fc_exit_request,
.init_hctx = nvme_fc_init_admin_hctx,
.timeout = nvme_fc_timeout,
+ .map_queues = nvme_fc_map_queues,
};
@@ -667,6 +667,19 @@ static void qla_nvme_remoteport_delete(struct nvme_fc_remote_port *rport)
complete(&fcport->nvme_del_done);
}
+static int qla_nvme_map_queues(struct nvme_fc_local_port *lport,
+ struct blk_mq_tag_set *set)
+{
+
+ struct blk_mq_queue_map *qmap = &set->map[HCTX_TYPE_DEFAULT];
+ int ret;
+
+ ret = blk_mq_map_queues(qmap);
+ qmap->use_managed_irq = true;
+
+ return ret;
+}
+
static struct nvme_fc_port_template qla_nvme_fc_transport = {
.localport_delete = qla_nvme_localport_delete,
.remoteport_delete = qla_nvme_remoteport_delete,
@@ -676,6 +689,7 @@ static struct nvme_fc_port_template qla_nvme_fc_transport = {
.ls_abort = qla_nvme_ls_abort,
.fcp_io = qla_nvme_post_cmd,
.fcp_abort = qla_nvme_fcp_abort,
+ .map_queues = qla_nvme_map_queues,
.max_hw_queues = 8,
.max_sgl_segments = 1024,
.max_dif_sgl_segments = 64,
@@ -7914,6 +7914,9 @@ static int qla2xxx_map_queues(struct Scsi_Host *shost)
rc = blk_mq_map_queues(qmap);
else
rc = blk_mq_pci_map_queues(qmap, vha->hw->pdev, vha->irq_offset);
+
+ qmap->use_managed_irq = true;
+
return rc;
}
@@ -471,6 +471,8 @@ struct nvme_fc_remote_port {
* specified by the fcp_request->private pointer.
* Value is Mandatory. Allowed to be zero.
*/
+struct blk_mq_tag_set;
+
struct nvme_fc_port_template {
/* initiator-based functions */
void (*localport_delete)(struct nvme_fc_local_port *);
@@ -497,6 +499,8 @@ struct nvme_fc_port_template {
int (*xmt_ls_rsp)(struct nvme_fc_local_port *localport,
struct nvme_fc_remote_port *rport,
struct nvmefc_ls_rsp *ls_rsp);
+ int (*map_queues)(struct nvme_fc_local_port *localport,
+ struct blk_mq_tag_set *set);
u32 max_hw_queues;
u16 max_sgl_segments;
nvme-fc is currently the only user of blk_mq_alloc_request_hctx(). With the recent changes to teach the nvme subsystem to honor managed IRQs, the assumption was the complete nvme-fc doesn't use managed IRQs. Unfortunately, the qla2xxx driver uses the managed IRQs. Add an interface the nvme-fc drivers to update the mapping and also to set the use_managed_irq flag. This is very ugly as we have to pass down struct blk_mq_tag_set. I haven't found any better way so far. Relax the requirement in the blk_mq_alloc_request_hctx() that only !managed IRQs are supported. As long one CPU is online in the requested hctx all is good. If this is not the case we return an error which allows the upper layer to start the reconnect loop. As the current qla2xxx already depends on managed IRQs the main difference with and without this patch is, that we see nvme nvme8: Connect command failed, error wo/DNR bit: -16402 nvme nvme8: NVME-FC{8}: reset: Reconnect attempt failed (-18) instead of just timeouts such as qla2xxx [0000:81:00.0]-5032:1: ABT_IOCB: Invalid completion handle (1da) -- timed-out. In both cases the system recovers as soon at least one CPUs is online in all hctx. Also note, this is only for admin request. As long no FC reset happens and a reconnect attempt is triggered, user space is able to issue I/Os do the target. Signed-off-by: Daniel Wagner <dwagner@suse.de> --- Hi, I've played a bit with this patch to figure out what the impact is for the qla2xxx driver. Basically, the situation doesn't change a lot with Ming's patches. If we happen to run into the situation that all CPUs are offline in one hctx and a reconnect attempt is triggered all traffic to the target cease. But as soon we have at least one CPU online in all hctx the system recovers. This patch just makes it a bit more verbose (maybe a warning could be added to blk_mq_alloc_request_hctx()). Thanks, Daniel block/blk-mq.c | 10 +++++++--- drivers/nvme/host/fc.c | 13 +++++++++++++ drivers/scsi/qla2xxx/qla_nvme.c | 14 ++++++++++++++ drivers/scsi/qla2xxx/qla_os.c | 3 +++ include/linux/nvme-fc-driver.h | 4 ++++ 5 files changed, 41 insertions(+), 3 deletions(-)