diff mbox series

[v2,07/10] qla2xxx: Observed call trace in smp_processor_id()

Message ID 20230807120958.3730-8-njavali@marvell.com (mailing list archive)
State Superseded
Headers show
Series qla2xxx driver misc features | expand

Commit Message

Nilesh Javali Aug. 7, 2023, 12:09 p.m. UTC
From: Bikash Hazarika <bhazarika@marvell.com>

Following Call Trace was observed,

localhost kernel: nvme nvme0: NVME-FC{0}: controller connect complete
localhost kernel: BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u129:4/75092
localhost kernel: nvme nvme0: NVME-FC{0}: new ctrl: NQN "nqn.1992-08.com.netapp:sn.b42d198afb4d11ecad6d00a098d6abfa:subsystem.PR_Channel2022_RH84_subsystem_291"
localhost kernel: caller is qla_nvme_post_cmd+0x216/0x1380 [qla2xxx]
localhost kernel: CPU: 6 PID: 75092 Comm: kworker/u129:4 Kdump: loaded Tainted: G    B   W  OE    --------- ---  5.14.0-70.22.1.el9_0.x86_64+debug #1
localhost kernel: Hardware name: HPE ProLiant XL420 Gen10/ProLiant XL420 Gen10, BIOS U39 01/13/2022
localhost kernel: Workqueue: nvme-wq nvme_async_event_work [nvme_core]
localhost kernel: Call Trace:
localhost kernel: dump_stack_lvl+0x57/0x7d
localhost kernel: check_preemption_disabled+0xc8/0xd0
localhost kernel: qla_nvme_post_cmd+0x216/0x1380 [qla2xxx]

Use raw_smp_processor_id api instead of smp_processor_id

Cc: stable@vger.kernel.org
Signed-off-by: Bikash Hazarika <bhazarika@marvell.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
---
 drivers/scsi/qla2xxx/qla_inline.h  | 2 +-
 drivers/scsi/qla2xxx/qla_isr.c     | 6 +++---
 drivers/scsi/qla2xxx/qla_target.c  | 2 +-
 drivers/scsi/qla2xxx/tcm_qla2xxx.c | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

Comments

John Garry Aug. 7, 2023, 1:06 p.m. UTC | #1
On 07/08/2023 13:09, Nilesh Javali wrote:
> From: Bikash Hazarika <bhazarika@marvell.com>
> 
> Following Call Trace was observed,
> 
> localhost kernel: nvme nvme0: NVME-FC{0}: controller connect complete
> localhost kernel: BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u129:4/75092
> localhost kernel: nvme nvme0: NVME-FC{0}: new ctrl: NQN "nqn.1992-08.com.netapp:sn.b42d198afb4d11ecad6d00a098d6abfa:subsystem.PR_Channel2022_RH84_subsystem_291"
> localhost kernel: caller is qla_nvme_post_cmd+0x216/0x1380 [qla2xxx]
> localhost kernel: CPU: 6 PID: 75092 Comm: kworker/u129:4 Kdump: loaded Tainted: G    B   W  OE    --------- ---  5.14.0-70.22.1.el9_0.x86_64+debug #1
> localhost kernel: Hardware name: HPE ProLiant XL420 Gen10/ProLiant XL420 Gen10, BIOS U39 01/13/2022
> localhost kernel: Workqueue: nvme-wq nvme_async_event_work [nvme_core]
> localhost kernel: Call Trace:
> localhost kernel: dump_stack_lvl+0x57/0x7d
> localhost kernel: check_preemption_disabled+0xc8/0xd0
> localhost kernel: qla_nvme_post_cmd+0x216/0x1380 [qla2xxx]
> 
> Use raw_smp_processor_id api instead of smp_processor_id

This patch changes 7x instances of when smp_processor_id() is used 
without reasonable justification for all of them.

Furthermore, for the instance where the calltrace is reported, above, 
there is no mention of why it is indeed safe to use 
raw_smp_processor_id() and why the warning from smp_processor_id() can 
be ignored.

> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Bikash Hazarika <bhazarika@marvell.com>
> Signed-off-by: Nilesh Javali <njavali@marvell.com>
> Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
> ---
>   drivers/scsi/qla2xxx/qla_inline.h  | 2 +-
>   drivers/scsi/qla2xxx/qla_isr.c     | 6 +++---
>   drivers/scsi/qla2xxx/qla_target.c  | 2 +-
>   drivers/scsi/qla2xxx/tcm_qla2xxx.c | 4 ++--
>   4 files changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
> index 0556969f6dc1..a4a56ab0ba74 100644
> --- a/drivers/scsi/qla2xxx/qla_inline.h
> +++ b/drivers/scsi/qla2xxx/qla_inline.h
> @@ -577,7 +577,7 @@ fcport_is_bigger(fc_port_t *fcport)
>   static inline struct qla_qpair *
>   qla_mapq_nvme_select_qpair(struct qla_hw_data *ha, struct qla_qpair *qpair)
>   {
> -	int cpuid = smp_processor_id();
> +	int cpuid = raw_smp_processor_id();
>   
>   	if (qpair->cpuid != cpuid &&
>   	    ha->qp_cpu_map[cpuid]) {
> diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
> index e98788191897..01fc300d640f 100644
> --- a/drivers/scsi/qla2xxx/qla_isr.c
> +++ b/drivers/scsi/qla2xxx/qla_isr.c
> @@ -3965,7 +3965,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
>   	if (!ha->flags.fw_started)
>   		return;
>   
> -	if (rsp->qpair->cpuid != smp_processor_id() || !rsp->qpair->rcv_intr) {
> +	if (rsp->qpair->cpuid != raw_smp_processor_id() || !rsp->qpair->rcv_intr) {
>   		rsp->qpair->rcv_intr = 1;
>   
>   		if (!rsp->qpair->cpu_mapped)
> @@ -4468,7 +4468,7 @@ qla2xxx_msix_rsp_q(int irq, void *dev_id)
>   	}
>   	ha = qpair->hw;
>   
> -	queue_work_on(smp_processor_id(), ha->wq, &qpair->q_work);
> +	queue_work_on(raw_smp_processor_id(), ha->wq, &qpair->q_work);
>   
>   	return IRQ_HANDLED;
>   }
> @@ -4494,7 +4494,7 @@ qla2xxx_msix_rsp_q_hs(int irq, void *dev_id)
>   	wrt_reg_dword(&reg->hccr, HCCRX_CLR_RISC_INT);
>   	spin_unlock_irqrestore(&ha->hardware_lock, flags);
>   
> -	queue_work_on(smp_processor_id(), ha->wq, &qpair->q_work);
> +	queue_work_on(raw_smp_processor_id(), ha->wq, &qpair->q_work);
>   
>   	return IRQ_HANDLED;
>   }
> diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
> index 2b815a9928ea..9278713c3021 100644
> --- a/drivers/scsi/qla2xxx/qla_target.c
> +++ b/drivers/scsi/qla2xxx/qla_target.c
> @@ -4425,7 +4425,7 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
>   		queue_work_on(cmd->se_cmd.cpuid, qla_tgt_wq, &cmd->work);
>   	} else if (ha->msix_count) {
>   		if (cmd->atio.u.isp24.fcp_cmnd.rddata)
> -			queue_work_on(smp_processor_id(), qla_tgt_wq,
> +			queue_work_on(raw_smp_processor_id(), qla_tgt_wq,
>   			    &cmd->work);
>   		else
>   			queue_work_on(cmd->se_cmd.cpuid, qla_tgt_wq,
> diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
> index 3b5ba4b47b3b..9566f0384353 100644
> --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
> +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
> @@ -310,7 +310,7 @@ static void tcm_qla2xxx_free_cmd(struct qla_tgt_cmd *cmd)
>   	cmd->trc_flags |= TRC_CMD_DONE;
>   
>   	INIT_WORK(&cmd->work, tcm_qla2xxx_complete_free);
> -	queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
> +	queue_work_on(raw_smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
>   }
>   
>   /*
> @@ -547,7 +547,7 @@ static void tcm_qla2xxx_handle_data(struct qla_tgt_cmd *cmd)
>   	cmd->trc_flags |= TRC_DATA_IN;
>   	cmd->cmd_in_wq = 1;
>   	INIT_WORK(&cmd->work, tcm_qla2xxx_handle_data_work);
> -	queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
> +	queue_work_on(raw_smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
>   }
>   
>   static int tcm_qla2xxx_chk_dif_tags(uint32_t tag)
Nilesh Javali Aug. 9, 2023, 6:03 a.m. UTC | #2
Hi John,

> -----Original Message-----
> From: John Garry <john.g.garry@oracle.com>
> Sent: Monday, August 7, 2023 6:36 PM
> To: Nilesh Javali <njavali@marvell.com>; martin.petersen@oracle.com
> Cc: linux-scsi@vger.kernel.org; GR-QLogic-Storage-Upstream <GR-QLogic-
> Storage-Upstream@marvell.com>; Anil Gurumurthy
> <agurumurthy@marvell.com>; Shreyas Deodhar <sdeodhar@marvell.com>
> Subject: [EXT] Re: [PATCH v2 07/10] qla2xxx: Observed call trace in
> smp_processor_id()
> 
> External Email
> 
> ----------------------------------------------------------------------
> On 07/08/2023 13:09, Nilesh Javali wrote:
> > From: Bikash Hazarika <bhazarika@marvell.com>
> >
> > Following Call Trace was observed,
> >
> > localhost kernel: nvme nvme0: NVME-FC{0}: controller connect complete
> > localhost kernel: BUG: using smp_processor_id() in preemptible [00000000]
> code: kworker/u129:4/75092
> > localhost kernel: nvme nvme0: NVME-FC{0}: new ctrl: NQN "nqn.1992-
> 08.com.netapp:sn.b42d198afb4d11ecad6d00a098d6abfa:subsystem.PR_Channe
> l2022_RH84_subsystem_291"
> > localhost kernel: caller is qla_nvme_post_cmd+0x216/0x1380 [qla2xxx]
> > localhost kernel: CPU: 6 PID: 75092 Comm: kworker/u129:4 Kdump: loaded
> Tainted: G    B   W  OE    --------- ---  5.14.0-70.22.1.el9_0.x86_64+debug #1
> > localhost kernel: Hardware name: HPE ProLiant XL420 Gen10/ProLiant XL420
> Gen10, BIOS U39 01/13/2022
> > localhost kernel: Workqueue: nvme-wq nvme_async_event_work [nvme_core]
> > localhost kernel: Call Trace:
> > localhost kernel: dump_stack_lvl+0x57/0x7d
> > localhost kernel: check_preemption_disabled+0xc8/0xd0
> > localhost kernel: qla_nvme_post_cmd+0x216/0x1380 [qla2xxx]
> >
> > Use raw_smp_processor_id api instead of smp_processor_id
> 
> This patch changes 7x instances of when smp_processor_id() is used
> without reasonable justification for all of them.
> 
> Furthermore, for the instance where the calltrace is reported, above,
> there is no mention of why it is indeed safe to use
> raw_smp_processor_id() and why the warning from smp_processor_id() can
> be ignored.
> 

Thanks for the review.

This patch aims to silent the warning message in case CONFIG_DEBUG_PREEMPT is turned on by
any user reporting an issue.
For qla2xxx driver it is not critical to have accurate CPU ID which would momentarily cause
performance hit for the thread that pre-empted. 

Thanks,
Nilesh
John Garry Aug. 9, 2023, 3:31 p.m. UTC | #3
On 09/08/2023 07:03, Nilesh Javali wrote:
>> Furthermore, for the instance where the calltrace is reported, above,
>> there is no mention of why it is indeed safe to use
>> raw_smp_processor_id() and why the warning from smp_processor_id() can
>> be ignored.
>>
> Thanks for the review.
> 
> This patch aims to silent the warning message in case CONFIG_DEBUG_PREEMPT is turned on by
> any user reporting an issue.
> For qla2xxx driver it is not critical to have accurate CPU ID which would momentarily cause
> performance hit for the thread that pre-empted.

I doubt that the driver needs to check the current CPU so often, if at 
all. But I don't know the driver well, so can't say more.

Apart from that, according to description of queue_work():
"We queue the work to the CPU on which it was submitted, but if the CPU 
dies it can be processed by another CPU."

So isn't something like
queue_work_on(raw_smp_processor_id(), ha->wq, &qpair->q_work) same as
queue_work(ha->wq, &qpair->q_work)?

I see that queue_work() already has the raw_smp_processor_id() call.

Thanks,
John
Nilesh Javali Aug. 10, 2023, 11:22 a.m. UTC | #4
> -----Original Message-----
> From: John Garry <john.g.garry@oracle.com>
> Sent: Wednesday, August 9, 2023 9:02 PM
> To: Nilesh Javali <njavali@marvell.com>; martin.petersen@oracle.com
> Cc: linux-scsi@vger.kernel.org; GR-QLogic-Storage-Upstream <GR-QLogic-
> Storage-Upstream@marvell.com>; Anil Gurumurthy
> <agurumurthy@marvell.com>; Shreyas Deodhar <sdeodhar@marvell.com>
> Subject: Re: [EXT] Re: [PATCH v2 07/10] qla2xxx: Observed call trace in
> smp_processor_id()
> 
> On 09/08/2023 07:03, Nilesh Javali wrote:
> >> Furthermore, for the instance where the calltrace is reported, above,
> >> there is no mention of why it is indeed safe to use
> >> raw_smp_processor_id() and why the warning from smp_processor_id() can
> >> be ignored.
> >>
> > Thanks for the review.
> >
> > This patch aims to silent the warning message in case
> CONFIG_DEBUG_PREEMPT is turned on by
> > any user reporting an issue.
> > For qla2xxx driver it is not critical to have accurate CPU ID which would
> momentarily cause
> > performance hit for the thread that pre-empted.
> 
> I doubt that the driver needs to check the current CPU so often, if at
> all. But I don't know the driver well, so can't say more.
> 
> Apart from that, according to description of queue_work():
> "We queue the work to the CPU on which it was submitted, but if the CPU
> dies it can be processed by another CPU."
> 
> So isn't something like
> queue_work_on(raw_smp_processor_id(), ha->wq, &qpair->q_work) same as
> queue_work(ha->wq, &qpair->q_work)?
> 
> I see that queue_work() already has the raw_smp_processor_id() call.

In fact queue_work uses queue_work_on that has raw_smp_processor_id.
So, qla2x using queue_work_on with raw_smp_processor_id should be okay,
unless you think otherwise.

Thanks,
Nilesh
John Garry Aug. 11, 2023, 11:11 a.m. UTC | #5
On 10/08/2023 12:22, Nilesh Javali wrote:
>> So isn't something like
>> queue_work_on(raw_smp_processor_id(), ha->wq, &qpair->q_work) same as
>> queue_work(ha->wq, &qpair->q_work)?
>>
>> I see that queue_work() already has the raw_smp_processor_id() call.
> In fact queue_work uses queue_work_on that has raw_smp_processor_id.
> So, qla2x using queue_work_on with raw_smp_processor_id should be okay,

Using queue_work(), rather than queue_work_on() with 
raw_smp_processor_id(), would be simpler. Simpler is generally better. 
And I see no advantage in using queue_work_on() with 
raw_smp_processor_id() (over queue_work()).

Thanks,
John
Nilesh Javali Aug. 21, 2023, 10:43 a.m. UTC | #6
John,

> -----Original Message-----
> From: John Garry <john.g.garry@oracle.com>
> Sent: Friday, August 11, 2023 4:42 PM
> To: Nilesh Javali <njavali@marvell.com>; martin.petersen@oracle.com
> Cc: linux-scsi@vger.kernel.org; GR-QLogic-Storage-Upstream <GR-QLogic-
> Storage-Upstream@marvell.com>; Anil Gurumurthy
> <agurumurthy@marvell.com>; Shreyas Deodhar <sdeodhar@marvell.com>
> Subject: Re: [EXT] Re: [PATCH v2 07/10] qla2xxx: Observed call trace in
> smp_processor_id()
> 
> On 10/08/2023 12:22, Nilesh Javali wrote:
> >> So isn't something like
> >> queue_work_on(raw_smp_processor_id(), ha->wq, &qpair->q_work)
> same as
> >> queue_work(ha->wq, &qpair->q_work)?
> >>
> >> I see that queue_work() already has the raw_smp_processor_id() call.
> > In fact queue_work uses queue_work_on that has
> raw_smp_processor_id.
> > So, qla2x using queue_work_on with raw_smp_processor_id should be
> okay,
> 
> Using queue_work(), rather than queue_work_on() with
> raw_smp_processor_id(), would be simpler. Simpler is generally better.
> And I see no advantage in using queue_work_on() with
> raw_smp_processor_id() (over queue_work()).
> 
> Thanks,
> John

I will send the change of using queue_work as a separate patch.
And send v3 of the series with this patch skipped.

Thanks for the review once again.

Thanks,
Nilesh
diff mbox series

Patch

diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
index 0556969f6dc1..a4a56ab0ba74 100644
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h
@@ -577,7 +577,7 @@  fcport_is_bigger(fc_port_t *fcport)
 static inline struct qla_qpair *
 qla_mapq_nvme_select_qpair(struct qla_hw_data *ha, struct qla_qpair *qpair)
 {
-	int cpuid = smp_processor_id();
+	int cpuid = raw_smp_processor_id();
 
 	if (qpair->cpuid != cpuid &&
 	    ha->qp_cpu_map[cpuid]) {
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index e98788191897..01fc300d640f 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -3965,7 +3965,7 @@  void qla24xx_process_response_queue(struct scsi_qla_host *vha,
 	if (!ha->flags.fw_started)
 		return;
 
-	if (rsp->qpair->cpuid != smp_processor_id() || !rsp->qpair->rcv_intr) {
+	if (rsp->qpair->cpuid != raw_smp_processor_id() || !rsp->qpair->rcv_intr) {
 		rsp->qpair->rcv_intr = 1;
 
 		if (!rsp->qpair->cpu_mapped)
@@ -4468,7 +4468,7 @@  qla2xxx_msix_rsp_q(int irq, void *dev_id)
 	}
 	ha = qpair->hw;
 
-	queue_work_on(smp_processor_id(), ha->wq, &qpair->q_work);
+	queue_work_on(raw_smp_processor_id(), ha->wq, &qpair->q_work);
 
 	return IRQ_HANDLED;
 }
@@ -4494,7 +4494,7 @@  qla2xxx_msix_rsp_q_hs(int irq, void *dev_id)
 	wrt_reg_dword(&reg->hccr, HCCRX_CLR_RISC_INT);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
-	queue_work_on(smp_processor_id(), ha->wq, &qpair->q_work);
+	queue_work_on(raw_smp_processor_id(), ha->wq, &qpair->q_work);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 2b815a9928ea..9278713c3021 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -4425,7 +4425,7 @@  static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 		queue_work_on(cmd->se_cmd.cpuid, qla_tgt_wq, &cmd->work);
 	} else if (ha->msix_count) {
 		if (cmd->atio.u.isp24.fcp_cmnd.rddata)
-			queue_work_on(smp_processor_id(), qla_tgt_wq,
+			queue_work_on(raw_smp_processor_id(), qla_tgt_wq,
 			    &cmd->work);
 		else
 			queue_work_on(cmd->se_cmd.cpuid, qla_tgt_wq,
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 3b5ba4b47b3b..9566f0384353 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -310,7 +310,7 @@  static void tcm_qla2xxx_free_cmd(struct qla_tgt_cmd *cmd)
 	cmd->trc_flags |= TRC_CMD_DONE;
 
 	INIT_WORK(&cmd->work, tcm_qla2xxx_complete_free);
-	queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
+	queue_work_on(raw_smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
 }
 
 /*
@@ -547,7 +547,7 @@  static void tcm_qla2xxx_handle_data(struct qla_tgt_cmd *cmd)
 	cmd->trc_flags |= TRC_DATA_IN;
 	cmd->cmd_in_wq = 1;
 	INIT_WORK(&cmd->work, tcm_qla2xxx_handle_data_work);
-	queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
+	queue_work_on(raw_smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
 }
 
 static int tcm_qla2xxx_chk_dif_tags(uint32_t tag)