diff mbox series

[v2] scsi: be2iscsi: Replace irq_poll with threaded IRQ handler.

Message ID 20211029074902.4fayed6mcltifgdz@linutronix.de (mailing list archive)
State Deferred
Headers show
Series [v2] scsi: be2iscsi: Replace irq_poll with threaded IRQ handler. | expand

Commit Message

Sebastian Andrzej Siewior Oct. 29, 2021, 7:49 a.m. UTC
The driver is using irq_poll() (a NAPI like generic interface) for
completing individual I/O requests.
This could be replaced with threaded interrupts. The threaded interrupt
runs as a RT thread with priority 50-FIFO. It should run as the first
task after the interrupt unless a task with higher priority is active.
It can be interrupt by another hardware interrupt or a softirq.

This has been compile tested only. I'm interested if it causes any
regressions, improves the situation or neither.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---

v1…v2:
  - Properly initialize variables in be_isr_misx_th() as reported by the
    bot.

  - Drop jitendra.bhivare@broadcom.com and
    subbu.seetharaman@broadcom.com from Cc:, the email bounces.

 drivers/scsi/be2iscsi/Kconfig    |   1 -
 drivers/scsi/be2iscsi/be.h       |   3 +-
 drivers/scsi/be2iscsi/be_iscsi.c |   8 +-
 drivers/scsi/be2iscsi/be_main.c  | 146 ++++++++++++++-----------------
 drivers/scsi/be2iscsi/be_main.h  |   2 +-
 5 files changed, 75 insertions(+), 85 deletions(-)

Comments

John Garry Oct. 29, 2021, 1:41 p.m. UTC | #1
> +static irqreturn_t be_iopoll(struct be_eq_obj *pbe_eq)
> +{
> +	struct beiscsi_hba *phba;
> +	unsigned int ret, io_events;
> +	struct be_eq_entry *eqe = NULL;
> +	struct be_queue_info *eq;
> +
> +	phba = pbe_eq->phba;
> +	if (beiscsi_hba_in_error(phba))
> +		return IRQ_NONE;
> +
> +	io_events = 0;
> +	eq = &pbe_eq->q;
> +	eqe = queue_tail_node(eq);
> +	while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32] &
> +			EQE_VALID_MASK) {
> +		AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
> +		queue_tail_inc(eq);
> +		eqe = queue_tail_node(eq);
> +		io_events++;
> +	}
> +	hwi_ring_eq_db(phba, eq->id, 1, io_events, 0, 1);
> +
> +	ret = beiscsi_process_cq(pbe_eq);
> +	pbe_eq->cq_count += ret;
> +	beiscsi_log(phba, KERN_INFO,
> +		    BEISCSI_LOG_CONFIG | BEISCSI_LOG_IO,
> +		    "BM_%d : rearm pbe_eq->q.id =%d ret %d\n",
> +		    pbe_eq->q.id, ret);
> +	if (!beiscsi_hba_in_error(phba))
> +		hwi_ring_eq_db(phba, pbe_eq->q.id, 0, 0, 1, 1);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static irqreturn_t be_isr_misx_th(int irq, void *dev_id)
> +{
> +	struct be_eq_obj *pbe_eq  = dev_id;
> +
> +	return be_iopoll(pbe_eq);
> +}
> +
>   /**
>    * be_isr_msix - The isr routine of the driver.
>    * @irq: Not used
> @@ -713,9 +752,22 @@ static irqreturn_t be_isr_msix(int irq, void *dev_id)
>   	phba = pbe_eq->phba;
>   	/* disable interrupt till iopoll completes */
>   	hwi_ring_eq_db(phba, eq->id, 1,	0, 0, 1);
> -	irq_poll_sched(&pbe_eq->iopoll);
>   
> -	return IRQ_HANDLED;
> +	return IRQ_WAKE_THREAD;
> +}
> +
> +static irqreturn_t be_isr_thread(int irq, void *dev_id)
> +{
> +	struct beiscsi_hba *phba;
> +	struct hwi_controller *phwi_ctrlr;
> +	struct hwi_context_memory *phwi_context;
> +	struct be_eq_obj *pbe_eq;
> +
> +	phba = dev_id;
> +	phwi_ctrlr = phba->phwi_ctrlr;
> +	phwi_context = phwi_ctrlr->phwi_ctxt;
> +	pbe_eq = &phwi_context->be_eq[0];
> +	return be_iopoll(pbe_eq);
>   }
>   
>   /**
> @@ -735,6 +787,7 @@ static irqreturn_t be_isr(int irq, void *dev_id)
>   	struct be_ctrl_info *ctrl;
>   	struct be_eq_obj *pbe_eq;
>   	int isr, rearm;
> +	irqreturn_t ret;
>   
>   	phba = dev_id;
>   	ctrl = &phba->ctrl;
> @@ -774,10 +827,11 @@ static irqreturn_t be_isr(int irq, void *dev_id)
>   		/* rearm for MCCQ */
>   		rearm = 1;
>   	}
> +	ret = IRQ_HANDLED;
>   	if (io_events)
> -		irq_poll_sched(&pbe_eq->iopoll);
> +		ret = IRQ_WAKE_THREAD;
>   	hwi_ring_eq_db(phba, eq->id, 0, (io_events + mcc_events), rearm, 1);
> -	return IRQ_HANDLED;
> +	return ret;
>   }
>   
>   static void beiscsi_free_irqs(struct beiscsi_hba *phba)
> @@ -819,9 +873,10 @@ static int beiscsi_init_irqs(struct beiscsi_hba *phba)
>   				goto free_msix_irqs;
>   			}
>   
> -			ret = request_irq(pci_irq_vector(pcidev, i),
> -					  be_isr_msix, 0, phba->msi_name[i],
> -					  &phwi_context->be_eq[i]);
> +			ret = request_threaded_irq(pci_irq_vector(pcidev, i),
> +						   be_isr_msix, be_isr_misx_th,
> +						   0, phba->msi_name[i],
> +						   &phwi_context->be_eq[i]);

Would it be sensible to set ONESHOT flag here? I assume that we don't 
want the hard irq handler firing continuously while we poll completions 
in the threaded handler. That's my understanding of how ONESHOT should 
or would work... they currently seem to manually disable in 
be_isr_msix() -> hwi_ring_eq_db() for the same purpose, I guess.

Thanks,
John
Sebastian Andrzej Siewior Oct. 29, 2021, 1:46 p.m. UTC | #2
On 2021-10-29 14:41:53 [+0100], John Garry wrote:
> > @@ -713,9 +752,22 @@ static irqreturn_t be_isr_msix(int irq, void *dev_id)
> >   	phba = pbe_eq->phba;
> >   	/* disable interrupt till iopoll completes */
> >   	hwi_ring_eq_db(phba, eq->id, 1,	0, 0, 1);
> > -	irq_poll_sched(&pbe_eq->iopoll);
> > -	return IRQ_HANDLED;
> > +	return IRQ_WAKE_THREAD;
> > +}
> > +
> > @@ -819,9 +873,10 @@ static int beiscsi_init_irqs(struct beiscsi_hba *phba)
> >   				goto free_msix_irqs;
> >   			}
> > -			ret = request_irq(pci_irq_vector(pcidev, i),
> > -					  be_isr_msix, 0, phba->msi_name[i],
> > -					  &phwi_context->be_eq[i]);
> > +			ret = request_threaded_irq(pci_irq_vector(pcidev, i),
> > +						   be_isr_msix, be_isr_misx_th,
> > +						   0, phba->msi_name[i],
> > +						   &phwi_context->be_eq[i]);
> 
> Would it be sensible to set ONESHOT flag here? I assume that we don't want
> the hard irq handler firing continuously while we poll completions in the
> threaded handler. That's my understanding of how ONESHOT should or would
> work... they currently seem to manually disable in be_isr_msix() ->
> hwi_ring_eq_db() for the same purpose, I guess.

We could. My understanding of hwi_ring_eq_db() of is that it disables
the interrupt from on HW level. This is needed _already_ in order to
continue to process the request later in softirq without the interrupt
constantly firing.
For the MSI interrupts this could be replaced "easily" and leaving the
primary handler empty. However the non-MSI interrupts may be shared and
here all handlers need the ONESHOT which may not be the case.

> Thanks,
> John

Sebastian
diff mbox series

Patch

diff --git a/drivers/scsi/be2iscsi/Kconfig b/drivers/scsi/be2iscsi/Kconfig
index 958c9b46ec787..0118b2a765caf 100644
--- a/drivers/scsi/be2iscsi/Kconfig
+++ b/drivers/scsi/be2iscsi/Kconfig
@@ -4,7 +4,6 @@  config BE2ISCSI
 	depends on PCI && SCSI && NET
 	select SCSI_ISCSI_ATTRS
 	select ISCSI_BOOT_SYSFS
-	select IRQ_POLL
 
 	help
 	This driver implements the iSCSI functionality for Emulex
diff --git a/drivers/scsi/be2iscsi/be.h b/drivers/scsi/be2iscsi/be.h
index 4c58a02590c7b..8623886810310 100644
--- a/drivers/scsi/be2iscsi/be.h
+++ b/drivers/scsi/be2iscsi/be.h
@@ -12,7 +12,7 @@ 
 
 #include <linux/pci.h>
 #include <linux/if_vlan.h>
-#include <linux/irq_poll.h>
+
 #define FW_VER_LEN	32
 #define MCC_Q_LEN	128
 #define MCC_CQ_LEN	256
@@ -90,7 +90,6 @@  struct be_eq_obj {
 	struct beiscsi_hba *phba;
 	struct be_queue_info *cq;
 	struct work_struct mcc_work; /* Work Item */
-	struct irq_poll	iopoll;
 };
 
 struct be_mcc_obj {
diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c
index 8aeaddc93b167..e15d5c7ef611e 100644
--- a/drivers/scsi/be2iscsi/be_iscsi.c
+++ b/drivers/scsi/be2iscsi/be_iscsi.c
@@ -1224,15 +1224,17 @@  static void beiscsi_flush_cq(struct beiscsi_hba *phba)
 	struct be_eq_obj *pbe_eq;
 	struct hwi_controller *phwi_ctrlr;
 	struct hwi_context_memory *phwi_context;
+	struct pci_dev *pcidev;
 
 	phwi_ctrlr = phba->phwi_ctrlr;
 	phwi_context = phwi_ctrlr->phwi_ctxt;
+	pcidev = phba->pcidev;
 
 	for (i = 0; i < phba->num_cpus; i++) {
 		pbe_eq = &phwi_context->be_eq[i];
-		irq_poll_disable(&pbe_eq->iopoll);
-		beiscsi_process_cq(pbe_eq, BE2_MAX_NUM_CQ_PROC);
-		irq_poll_enable(&pbe_eq->iopoll);
+		disable_irq(pci_irq_vector(pcidev, i));
+		beiscsi_process_cq(pbe_eq);
+		enable_irq(pci_irq_vector(pcidev, i));
 	}
 }
 
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index e70f69f791db6..ef09e298cca94 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -35,7 +35,6 @@ 
 #include <linux/iscsi_boot_sysfs.h>
 #include <linux/module.h>
 #include <linux/bsg-lib.h>
-#include <linux/irq_poll.h>
 
 #include <scsi/libiscsi.h>
 #include <scsi/scsi_bsg_iscsi.h>
@@ -51,7 +50,6 @@ 
 #include "be_mgmt.h"
 #include "be_cmds.h"
 
-static unsigned int be_iopoll_budget = 10;
 static unsigned int be_max_phys_size = 64;
 static unsigned int enable_msix = 1;
 
@@ -59,7 +57,6 @@  MODULE_DESCRIPTION(DRV_DESC " " BUILD_STR);
 MODULE_VERSION(BUILD_STR);
 MODULE_AUTHOR("Emulex Corporation");
 MODULE_LICENSE("GPL");
-module_param(be_iopoll_budget, int, 0);
 module_param(enable_msix, int, 0);
 module_param(be_max_phys_size, uint, S_IRUGO);
 MODULE_PARM_DESC(be_max_phys_size,
@@ -696,6 +693,48 @@  static irqreturn_t be_isr_mcc(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t be_iopoll(struct be_eq_obj *pbe_eq)
+{
+	struct beiscsi_hba *phba;
+	unsigned int ret, io_events;
+	struct be_eq_entry *eqe = NULL;
+	struct be_queue_info *eq;
+
+	phba = pbe_eq->phba;
+	if (beiscsi_hba_in_error(phba))
+		return IRQ_NONE;
+
+	io_events = 0;
+	eq = &pbe_eq->q;
+	eqe = queue_tail_node(eq);
+	while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32] &
+			EQE_VALID_MASK) {
+		AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
+		queue_tail_inc(eq);
+		eqe = queue_tail_node(eq);
+		io_events++;
+	}
+	hwi_ring_eq_db(phba, eq->id, 1, io_events, 0, 1);
+
+	ret = beiscsi_process_cq(pbe_eq);
+	pbe_eq->cq_count += ret;
+	beiscsi_log(phba, KERN_INFO,
+		    BEISCSI_LOG_CONFIG | BEISCSI_LOG_IO,
+		    "BM_%d : rearm pbe_eq->q.id =%d ret %d\n",
+		    pbe_eq->q.id, ret);
+	if (!beiscsi_hba_in_error(phba))
+		hwi_ring_eq_db(phba, pbe_eq->q.id, 0, 0, 1, 1);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t be_isr_misx_th(int irq, void *dev_id)
+{
+	struct be_eq_obj *pbe_eq  = dev_id;
+
+	return be_iopoll(pbe_eq);
+}
+
 /**
  * be_isr_msix - The isr routine of the driver.
  * @irq: Not used
@@ -713,9 +752,22 @@  static irqreturn_t be_isr_msix(int irq, void *dev_id)
 	phba = pbe_eq->phba;
 	/* disable interrupt till iopoll completes */
 	hwi_ring_eq_db(phba, eq->id, 1,	0, 0, 1);
-	irq_poll_sched(&pbe_eq->iopoll);
 
-	return IRQ_HANDLED;
+	return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t be_isr_thread(int irq, void *dev_id)
+{
+	struct beiscsi_hba *phba;
+	struct hwi_controller *phwi_ctrlr;
+	struct hwi_context_memory *phwi_context;
+	struct be_eq_obj *pbe_eq;
+
+	phba = dev_id;
+	phwi_ctrlr = phba->phwi_ctrlr;
+	phwi_context = phwi_ctrlr->phwi_ctxt;
+	pbe_eq = &phwi_context->be_eq[0];
+	return be_iopoll(pbe_eq);
 }
 
 /**
@@ -735,6 +787,7 @@  static irqreturn_t be_isr(int irq, void *dev_id)
 	struct be_ctrl_info *ctrl;
 	struct be_eq_obj *pbe_eq;
 	int isr, rearm;
+	irqreturn_t ret;
 
 	phba = dev_id;
 	ctrl = &phba->ctrl;
@@ -774,10 +827,11 @@  static irqreturn_t be_isr(int irq, void *dev_id)
 		/* rearm for MCCQ */
 		rearm = 1;
 	}
+	ret = IRQ_HANDLED;
 	if (io_events)
-		irq_poll_sched(&pbe_eq->iopoll);
+		ret = IRQ_WAKE_THREAD;
 	hwi_ring_eq_db(phba, eq->id, 0, (io_events + mcc_events), rearm, 1);
-	return IRQ_HANDLED;
+	return ret;
 }
 
 static void beiscsi_free_irqs(struct beiscsi_hba *phba)
@@ -819,9 +873,10 @@  static int beiscsi_init_irqs(struct beiscsi_hba *phba)
 				goto free_msix_irqs;
 			}
 
-			ret = request_irq(pci_irq_vector(pcidev, i),
-					  be_isr_msix, 0, phba->msi_name[i],
-					  &phwi_context->be_eq[i]);
+			ret = request_threaded_irq(pci_irq_vector(pcidev, i),
+						   be_isr_msix, be_isr_misx_th,
+						   0, phba->msi_name[i],
+						   &phwi_context->be_eq[i]);
 			if (ret) {
 				beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_INIT,
 					    "BM_%d : %s-Failed to register msix for i = %d\n",
@@ -847,8 +902,8 @@  static int beiscsi_init_irqs(struct beiscsi_hba *phba)
 		}
 
 	} else {
-		ret = request_irq(pcidev->irq, be_isr, IRQF_SHARED,
-				  "beiscsi", phba);
+		ret = request_threaded_irq(pcidev->irq, be_isr, be_isr_thread,
+					   IRQF_SHARED, "beiscsi", phba);
 		if (ret) {
 			beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_INIT,
 				    "BM_%d : %s-Failed to register irq\n",
@@ -1839,12 +1894,11 @@  static void beiscsi_mcc_work(struct work_struct *work)
 /**
  * beiscsi_process_cq()- Process the Completion Queue
  * @pbe_eq: Event Q on which the Completion has come
- * @budget: Max number of events to processed
  *
  * return
  *     Number of Completion Entries processed.
  **/
-unsigned int beiscsi_process_cq(struct be_eq_obj *pbe_eq, int budget)
+unsigned int beiscsi_process_cq(struct be_eq_obj *pbe_eq)
 {
 	struct be_queue_info *cq;
 	struct sol_cqe *sol;
@@ -2018,55 +2072,12 @@  unsigned int beiscsi_process_cq(struct be_eq_obj *pbe_eq, int budget)
 		queue_tail_inc(cq);
 		sol = queue_tail_node(cq);
 		num_processed++;
-		if (total == budget)
-			break;
 	}
 
 	hwi_ring_cq_db(phba, cq->id, num_processed, 1);
 	return total;
 }
 
-static int be_iopoll(struct irq_poll *iop, int budget)
-{
-	unsigned int ret, io_events;
-	struct beiscsi_hba *phba;
-	struct be_eq_obj *pbe_eq;
-	struct be_eq_entry *eqe = NULL;
-	struct be_queue_info *eq;
-
-	pbe_eq = container_of(iop, struct be_eq_obj, iopoll);
-	phba = pbe_eq->phba;
-	if (beiscsi_hba_in_error(phba)) {
-		irq_poll_complete(iop);
-		return 0;
-	}
-
-	io_events = 0;
-	eq = &pbe_eq->q;
-	eqe = queue_tail_node(eq);
-	while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32] &
-			EQE_VALID_MASK) {
-		AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
-		queue_tail_inc(eq);
-		eqe = queue_tail_node(eq);
-		io_events++;
-	}
-	hwi_ring_eq_db(phba, eq->id, 1, io_events, 0, 1);
-
-	ret = beiscsi_process_cq(pbe_eq, budget);
-	pbe_eq->cq_count += ret;
-	if (ret < budget) {
-		irq_poll_complete(iop);
-		beiscsi_log(phba, KERN_INFO,
-			    BEISCSI_LOG_CONFIG | BEISCSI_LOG_IO,
-			    "BM_%d : rearm pbe_eq->q.id =%d ret %d\n",
-			    pbe_eq->q.id, ret);
-		if (!beiscsi_hba_in_error(phba))
-			hwi_ring_eq_db(phba, pbe_eq->q.id, 0, 0, 1, 1);
-	}
-	return ret;
-}
-
 static void
 hwi_write_sgl_v2(struct iscsi_wrb *pwrb, struct scatterlist *sg,
 		  unsigned int num_sg, struct beiscsi_io_task *io_task)
@@ -5308,10 +5319,6 @@  static int beiscsi_enable_port(struct beiscsi_hba *phba)
 
 	phwi_ctrlr = phba->phwi_ctrlr;
 	phwi_context = phwi_ctrlr->phwi_ctxt;
-	for (i = 0; i < phba->num_cpus; i++) {
-		pbe_eq = &phwi_context->be_eq[i];
-		irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget, be_iopoll);
-	}
 
 	i = (phba->pcidev->msix_enabled) ? i : 0;
 	/* Work item for MCC handling */
@@ -5344,10 +5351,6 @@  static int beiscsi_enable_port(struct beiscsi_hba *phba)
 	return 0;
 
 cleanup_port:
-	for (i = 0; i < phba->num_cpus; i++) {
-		pbe_eq = &phwi_context->be_eq[i];
-		irq_poll_disable(&pbe_eq->iopoll);
-	}
 	hwi_cleanup_port(phba);
 
 disable_msix:
@@ -5379,10 +5382,6 @@  static void beiscsi_disable_port(struct beiscsi_hba *phba, int unload)
 	beiscsi_free_irqs(phba);
 	pci_free_irq_vectors(phba->pcidev);
 
-	for (i = 0; i < phba->num_cpus; i++) {
-		pbe_eq = &phwi_context->be_eq[i];
-		irq_poll_disable(&pbe_eq->iopoll);
-	}
 	cancel_delayed_work_sync(&phba->eqd_update);
 	cancel_work_sync(&phba->boot_work);
 	/* WQ might be running cancel queued mcc_work if we are not exiting */
@@ -5637,11 +5636,6 @@  static int beiscsi_dev_probe(struct pci_dev *pcidev,
 	phwi_ctrlr = phba->phwi_ctrlr;
 	phwi_context = phwi_ctrlr->phwi_ctxt;
 
-	for (i = 0; i < phba->num_cpus; i++) {
-		pbe_eq = &phwi_context->be_eq[i];
-		irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget, be_iopoll);
-	}
-
 	i = (phba->pcidev->msix_enabled) ? i : 0;
 	/* Work item for MCC handling */
 	pbe_eq = &phwi_context->be_eq[i];
@@ -5698,10 +5692,6 @@  static int beiscsi_dev_probe(struct pci_dev *pcidev,
 	hwi_disable_intr(phba);
 	beiscsi_free_irqs(phba);
 disable_iopoll:
-	for (i = 0; i < phba->num_cpus; i++) {
-		pbe_eq = &phwi_context->be_eq[i];
-		irq_poll_disable(&pbe_eq->iopoll);
-	}
 	destroy_workqueue(phba->wq);
 free_twq:
 	hwi_cleanup_port(phba);
diff --git a/drivers/scsi/be2iscsi/be_main.h b/drivers/scsi/be2iscsi/be_main.h
index 98977c0700f1a..5c0e54eaf9e99 100644
--- a/drivers/scsi/be2iscsi/be_main.h
+++ b/drivers/scsi/be2iscsi/be_main.h
@@ -799,7 +799,7 @@  void hwi_ring_cq_db(struct beiscsi_hba *phba,
 		     unsigned int id, unsigned int num_processed,
 		     unsigned char rearm);
 
-unsigned int beiscsi_process_cq(struct be_eq_obj *pbe_eq, int budget);
+unsigned int beiscsi_process_cq(struct be_eq_obj *pbe_eq);
 void beiscsi_process_mcc_cq(struct beiscsi_hba *phba);
 
 struct pdu_nop_out {