diff mbox series

[7/9] scsi: hisi_sas_v3: convert private reply queue to blk-mq hw queue

Message ID 20190531022801.10003-8-ming.lei@redhat.com (mailing list archive)
State New, archived
Headers show
Series blk-mq/scsi: convert private reply queue into blk_mq hw queue | expand

Commit Message

Ming Lei May 31, 2019, 2:27 a.m. UTC
SCSI's reply qeueue is very similar with blk-mq's hw queue, both
assigned by IRQ vector, so map te private reply queue into blk-mq's hw
queue via .host_tagset.

Then the private reply mapping can be removed.

Another benefit is that the request/irq lost issue may be solved in
generic approach because managed IRQ may be shutdown during CPU
hotplug.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/scsi/hisi_sas/hisi_sas.h       |  2 +-
 drivers/scsi/hisi_sas/hisi_sas_main.c  | 36 ++++++++++----------
 drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 46 +++++++++-----------------
 3 files changed, 36 insertions(+), 48 deletions(-)

Comments

Hannes Reinecke May 31, 2019, 6:20 a.m. UTC | #1
On 5/31/19 4:27 AM, Ming Lei wrote:
> SCSI's reply qeueue is very similar with blk-mq's hw queue, both
> assigned by IRQ vector, so map te private reply queue into blk-mq's hw
> queue via .host_tagset.
> 
> Then the private reply mapping can be removed.
> 
> Another benefit is that the request/irq lost issue may be solved in
> generic approach because managed IRQ may be shutdown during CPU
> hotplug.
> 
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>  drivers/scsi/hisi_sas/hisi_sas.h       |  2 +-
>  drivers/scsi/hisi_sas/hisi_sas_main.c  | 36 ++++++++++----------
>  drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 46 +++++++++-----------------
>  3 files changed, 36 insertions(+), 48 deletions(-)
> 
> diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
> index fc87994b5d73..3d48848dbde7 100644
> --- a/drivers/scsi/hisi_sas/hisi_sas.h
> +++ b/drivers/scsi/hisi_sas/hisi_sas.h
> @@ -26,6 +26,7 @@
>  #include <linux/platform_device.h>
>  #include <linux/property.h>
>  #include <linux/regmap.h>
> +#include <linux/blk-mq-pci.h>
>  #include <scsi/sas_ata.h>
>  #include <scsi/libsas.h>
>  
> @@ -378,7 +379,6 @@ struct hisi_hba {
>  	u32 intr_coal_count;	/* Interrupt count to coalesce */
>  
>  	int cq_nvecs;
> -	unsigned int *reply_map;
>  
>  	/* debugfs memories */
>  	u32 *debugfs_global_reg;
> diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
> index 8a7feb8ed8d6..a1c1f30b9fdb 100644
> --- a/drivers/scsi/hisi_sas/hisi_sas_main.c
> +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
> @@ -441,6 +441,19 @@ static int hisi_sas_dif_dma_map(struct hisi_hba *hisi_hba,
>  	return rc;
>  }
>  
> +static struct scsi_cmnd *sas_task_to_scsi_cmd(struct sas_task *task)
> +{
> +	if (!task->uldd_task)
> +		return NULL;
> +
> +	if (dev_is_sata(task->dev)) {
> +		struct ata_queued_cmd *qc = task->uldd_task;
> +		return qc->scsicmd;
> +	} else {
> +		return task->uldd_task;
> +	}
> +}
> +
>  static int hisi_sas_task_prep(struct sas_task *task,
>  			      struct hisi_sas_dq **dq_pointer,
>  			      bool is_tmf, struct hisi_sas_tmf_task *tmf,
> @@ -459,6 +472,7 @@ static int hisi_sas_task_prep(struct sas_task *task,
>  	struct hisi_sas_dq *dq;
>  	unsigned long flags;
>  	int wr_q_index;
> +	struct scsi_cmnd *scsi_cmnd;
>  
>  	if (DEV_IS_GONE(sas_dev)) {
>  		if (sas_dev)
> @@ -471,9 +485,10 @@ static int hisi_sas_task_prep(struct sas_task *task,
>  		return -ECOMM;
>  	}
>  
> -	if (hisi_hba->reply_map) {
> -		int cpu = raw_smp_processor_id();
> -		unsigned int dq_index = hisi_hba->reply_map[cpu];
> +	scsi_cmnd = sas_task_to_scsi_cmd(task);
> +	if (hisi_hba->shost->hostt->host_tagset) {
> +		unsigned int dq_index = scsi_cmnd_hctx_index(
> +				hisi_hba->shost, scsi_cmnd);
>  
>  		*dq_pointer = dq = &hisi_hba->dq[dq_index];
>  	} else {
> @@ -503,21 +518,8 @@ static int hisi_sas_task_prep(struct sas_task *task,
>  
>  	if (hisi_hba->hw->slot_index_alloc)
>  		rc = hisi_hba->hw->slot_index_alloc(hisi_hba, device);
> -	else {
> -		struct scsi_cmnd *scsi_cmnd = NULL;
> -
> -		if (task->uldd_task) {
> -			struct ata_queued_cmd *qc;
> -
> -			if (dev_is_sata(device)) {
> -				qc = task->uldd_task;
> -				scsi_cmnd = qc->scsicmd;
> -			} else {
> -				scsi_cmnd = task->uldd_task;
> -			}
> -		}
> +	else
>  		rc  = hisi_sas_slot_index_alloc(hisi_hba, scsi_cmnd);
> -	}
>  	if (rc < 0)
>  		goto err_out_dif_dma_unmap;
>  
> diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
> index 49620c2411df..063e50e5b30c 100644
> --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
> +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
> @@ -2344,30 +2344,6 @@ static irqreturn_t cq_interrupt_v3_hw(int irq_no, void *p)
>  	return IRQ_HANDLED;
>  }
>  
> -static void setup_reply_map_v3_hw(struct hisi_hba *hisi_hba, int nvecs)
> -{
> -	const struct cpumask *mask;
> -	int queue, cpu;
> -
> -	for (queue = 0; queue < nvecs; queue++) {
> -		struct hisi_sas_cq *cq = &hisi_hba->cq[queue];
> -
> -		mask = pci_irq_get_affinity(hisi_hba->pci_dev, queue +
> -					    BASE_VECTORS_V3_HW);
> -		if (!mask)
> -			goto fallback;
> -		cq->pci_irq_mask = mask;
> -		for_each_cpu(cpu, mask)
> -			hisi_hba->reply_map[cpu] = queue;
> -	}
> -	return;
> -
> -fallback:
> -	for_each_possible_cpu(cpu)
> -		hisi_hba->reply_map[cpu] = cpu % hisi_hba->queue_count;
> -	/* Don't clean all CQ masks */
> -}
> -
>  static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
>  {
>  	struct device *dev = hisi_hba->dev;
> @@ -2383,11 +2359,6 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
>  
>  		min_msi = MIN_AFFINE_VECTORS_V3_HW;
>  
> -		hisi_hba->reply_map = devm_kcalloc(dev, nr_cpu_ids,
> -						   sizeof(unsigned int),
> -						   GFP_KERNEL);
> -		if (!hisi_hba->reply_map)
> -			return -ENOMEM;
>  		vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev,
>  							 min_msi, max_msi,
>  							 PCI_IRQ_MSI |
> @@ -2395,7 +2366,6 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
>  							 &desc);
>  		if (vectors < 0)
>  			return -ENOENT;
> -		setup_reply_map_v3_hw(hisi_hba, vectors - BASE_VECTORS_V3_HW);
>  	} else {
>  		min_msi = max_msi;
>  		vectors = pci_alloc_irq_vectors(hisi_hba->pci_dev, min_msi,
> @@ -2896,6 +2866,18 @@ static void debugfs_snapshot_restore_v3_hw(struct hisi_hba *hisi_hba)
>  	clear_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
>  }
>  
> +static int hisi_sas_map_queues(struct Scsi_Host *shost)
> +{
> +	struct hisi_hba *hisi_hba = shost_priv(shost);
> +	struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
> +
> +	if (auto_affine_msi_experimental)
> +		return blk_mq_pci_map_queues(qmap, hisi_hba->pci_dev,
> +				BASE_VECTORS_V3_HW);
> +	else
> +		return blk_mq_map_queues(qmap);
> +}
> +
>  static struct scsi_host_template sht_v3_hw = {
>  	.name			= DRV_NAME,
>  	.module			= THIS_MODULE,

As mentioned, we should be using a common function here.

> @@ -2906,6 +2888,8 @@ static struct scsi_host_template sht_v3_hw = {
>  	.scan_start		= hisi_sas_scan_start,
>  	.change_queue_depth	= sas_change_queue_depth,
>  	.bios_param		= sas_bios_param,
> +	.map_queues		= hisi_sas_map_queues,
> +	.host_tagset		= 1,
>  	.this_id		= -1,
>  	.sg_tablesize		= HISI_SAS_SGE_PAGE_CNT,
>  	.sg_prot_tablesize	= HISI_SAS_SGE_PAGE_CNT,
> @@ -3092,6 +3076,8 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	if (hisi_sas_debugfs_enable)
>  		hisi_sas_debugfs_init(hisi_hba);
>  
> +	shost->nr_hw_queues = hisi_hba->cq_nvecs;
> +
>  	rc = scsi_add_host(shost, dev);
>  	if (rc)
>  		goto err_out_ha;
> 
Well, I'd rather see the v3 hardware converted to 'real' blk-mq first;
the hardware itself is pretty much multiqueue already, so we should be
better off converting it to blk-mq.

Cheers,

Hannes
Ming Lei May 31, 2019, 6:34 a.m. UTC | #2
On Fri, May 31, 2019 at 2:21 PM Hannes Reinecke <hare@suse.de> wrote:
>
> On 5/31/19 4:27 AM, Ming Lei wrote:
> > SCSI's reply qeueue is very similar with blk-mq's hw queue, both
> > assigned by IRQ vector, so map te private reply queue into blk-mq's hw
> > queue via .host_tagset.
> >
> > Then the private reply mapping can be removed.
> >
> > Another benefit is that the request/irq lost issue may be solved in
> > generic approach because managed IRQ may be shutdown during CPU
> > hotplug.
> >
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > ---
> >  drivers/scsi/hisi_sas/hisi_sas.h       |  2 +-
> >  drivers/scsi/hisi_sas/hisi_sas_main.c  | 36 ++++++++++----------
> >  drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 46 +++++++++-----------------
> >  3 files changed, 36 insertions(+), 48 deletions(-)
> >
> > diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
> > index fc87994b5d73..3d48848dbde7 100644
> > --- a/drivers/scsi/hisi_sas/hisi_sas.h
> > +++ b/drivers/scsi/hisi_sas/hisi_sas.h
> > @@ -26,6 +26,7 @@
> >  #include <linux/platform_device.h>
> >  #include <linux/property.h>
> >  #include <linux/regmap.h>
> > +#include <linux/blk-mq-pci.h>
> >  #include <scsi/sas_ata.h>
> >  #include <scsi/libsas.h>
> >
> > @@ -378,7 +379,6 @@ struct hisi_hba {
> >       u32 intr_coal_count;    /* Interrupt count to coalesce */
> >
> >       int cq_nvecs;
> > -     unsigned int *reply_map;
> >
> >       /* debugfs memories */
> >       u32 *debugfs_global_reg;
> > diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
> > index 8a7feb8ed8d6..a1c1f30b9fdb 100644
> > --- a/drivers/scsi/hisi_sas/hisi_sas_main.c
> > +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
> > @@ -441,6 +441,19 @@ static int hisi_sas_dif_dma_map(struct hisi_hba *hisi_hba,
> >       return rc;
> >  }
> >
> > +static struct scsi_cmnd *sas_task_to_scsi_cmd(struct sas_task *task)
> > +{
> > +     if (!task->uldd_task)
> > +             return NULL;
> > +
> > +     if (dev_is_sata(task->dev)) {
> > +             struct ata_queued_cmd *qc = task->uldd_task;
> > +             return qc->scsicmd;
> > +     } else {
> > +             return task->uldd_task;
> > +     }
> > +}
> > +
> >  static int hisi_sas_task_prep(struct sas_task *task,
> >                             struct hisi_sas_dq **dq_pointer,
> >                             bool is_tmf, struct hisi_sas_tmf_task *tmf,
> > @@ -459,6 +472,7 @@ static int hisi_sas_task_prep(struct sas_task *task,
> >       struct hisi_sas_dq *dq;
> >       unsigned long flags;
> >       int wr_q_index;
> > +     struct scsi_cmnd *scsi_cmnd;
> >
> >       if (DEV_IS_GONE(sas_dev)) {
> >               if (sas_dev)
> > @@ -471,9 +485,10 @@ static int hisi_sas_task_prep(struct sas_task *task,
> >               return -ECOMM;
> >       }
> >
> > -     if (hisi_hba->reply_map) {
> > -             int cpu = raw_smp_processor_id();
> > -             unsigned int dq_index = hisi_hba->reply_map[cpu];
> > +     scsi_cmnd = sas_task_to_scsi_cmd(task);
> > +     if (hisi_hba->shost->hostt->host_tagset) {
> > +             unsigned int dq_index = scsi_cmnd_hctx_index(
> > +                             hisi_hba->shost, scsi_cmnd);
> >
> >               *dq_pointer = dq = &hisi_hba->dq[dq_index];
> >       } else {
> > @@ -503,21 +518,8 @@ static int hisi_sas_task_prep(struct sas_task *task,
> >
> >       if (hisi_hba->hw->slot_index_alloc)
> >               rc = hisi_hba->hw->slot_index_alloc(hisi_hba, device);
> > -     else {
> > -             struct scsi_cmnd *scsi_cmnd = NULL;
> > -
> > -             if (task->uldd_task) {
> > -                     struct ata_queued_cmd *qc;
> > -
> > -                     if (dev_is_sata(device)) {
> > -                             qc = task->uldd_task;
> > -                             scsi_cmnd = qc->scsicmd;
> > -                     } else {
> > -                             scsi_cmnd = task->uldd_task;
> > -                     }
> > -             }
> > +     else
> >               rc  = hisi_sas_slot_index_alloc(hisi_hba, scsi_cmnd);
> > -     }
> >       if (rc < 0)
> >               goto err_out_dif_dma_unmap;
> >
> > diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
> > index 49620c2411df..063e50e5b30c 100644
> > --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
> > +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
> > @@ -2344,30 +2344,6 @@ static irqreturn_t cq_interrupt_v3_hw(int irq_no, void *p)
> >       return IRQ_HANDLED;
> >  }
> >
> > -static void setup_reply_map_v3_hw(struct hisi_hba *hisi_hba, int nvecs)
> > -{
> > -     const struct cpumask *mask;
> > -     int queue, cpu;
> > -
> > -     for (queue = 0; queue < nvecs; queue++) {
> > -             struct hisi_sas_cq *cq = &hisi_hba->cq[queue];
> > -
> > -             mask = pci_irq_get_affinity(hisi_hba->pci_dev, queue +
> > -                                         BASE_VECTORS_V3_HW);
> > -             if (!mask)
> > -                     goto fallback;
> > -             cq->pci_irq_mask = mask;
> > -             for_each_cpu(cpu, mask)
> > -                     hisi_hba->reply_map[cpu] = queue;
> > -     }
> > -     return;
> > -
> > -fallback:
> > -     for_each_possible_cpu(cpu)
> > -             hisi_hba->reply_map[cpu] = cpu % hisi_hba->queue_count;
> > -     /* Don't clean all CQ masks */
> > -}
> > -
> >  static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
> >  {
> >       struct device *dev = hisi_hba->dev;
> > @@ -2383,11 +2359,6 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
> >
> >               min_msi = MIN_AFFINE_VECTORS_V3_HW;
> >
> > -             hisi_hba->reply_map = devm_kcalloc(dev, nr_cpu_ids,
> > -                                                sizeof(unsigned int),
> > -                                                GFP_KERNEL);
> > -             if (!hisi_hba->reply_map)
> > -                     return -ENOMEM;
> >               vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev,
> >                                                        min_msi, max_msi,
> >                                                        PCI_IRQ_MSI |
> > @@ -2395,7 +2366,6 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
> >                                                        &desc);
> >               if (vectors < 0)
> >                       return -ENOENT;
> > -             setup_reply_map_v3_hw(hisi_hba, vectors - BASE_VECTORS_V3_HW);
> >       } else {
> >               min_msi = max_msi;
> >               vectors = pci_alloc_irq_vectors(hisi_hba->pci_dev, min_msi,
> > @@ -2896,6 +2866,18 @@ static void debugfs_snapshot_restore_v3_hw(struct hisi_hba *hisi_hba)
> >       clear_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
> >  }
> >
> > +static int hisi_sas_map_queues(struct Scsi_Host *shost)
> > +{
> > +     struct hisi_hba *hisi_hba = shost_priv(shost);
> > +     struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
> > +
> > +     if (auto_affine_msi_experimental)
> > +             return blk_mq_pci_map_queues(qmap, hisi_hba->pci_dev,
> > +                             BASE_VECTORS_V3_HW);
> > +     else
> > +             return blk_mq_map_queues(qmap);
> > +}
> > +
> >  static struct scsi_host_template sht_v3_hw = {
> >       .name                   = DRV_NAME,
> >       .module                 = THIS_MODULE,
>
> As mentioned, we should be using a common function here.
>
> > @@ -2906,6 +2888,8 @@ static struct scsi_host_template sht_v3_hw = {
> >       .scan_start             = hisi_sas_scan_start,
> >       .change_queue_depth     = sas_change_queue_depth,
> >       .bios_param             = sas_bios_param,
> > +     .map_queues             = hisi_sas_map_queues,
> > +     .host_tagset            = 1,
> >       .this_id                = -1,
> >       .sg_tablesize           = HISI_SAS_SGE_PAGE_CNT,
> >       .sg_prot_tablesize      = HISI_SAS_SGE_PAGE_CNT,
> > @@ -3092,6 +3076,8 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> >       if (hisi_sas_debugfs_enable)
> >               hisi_sas_debugfs_init(hisi_hba);
> >
> > +     shost->nr_hw_queues = hisi_hba->cq_nvecs;
> > +
> >       rc = scsi_add_host(shost, dev);
> >       if (rc)
> >               goto err_out_ha;
> >
> Well, I'd rather see the v3 hardware converted to 'real' blk-mq first;
> the hardware itself is pretty much multiqueue already, so we should be
> better off converting it to blk-mq.

From John Garry's input, the tags is still hostwide, then not sure how to
partition the hostwide tags into each hw queue's tags. That can be quite
hard to do if the queue depth isn't big enough.

Thanks,
Ming Lei
Hannes Reinecke May 31, 2019, 6:42 a.m. UTC | #3
On 5/31/19 8:34 AM, Ming Lei wrote:
> On Fri, May 31, 2019 at 2:21 PM Hannes Reinecke <hare@suse.de> wrote:
>>
>> On 5/31/19 4:27 AM, Ming Lei wrote:
>>> SCSI's reply qeueue is very similar with blk-mq's hw queue, both
>>> assigned by IRQ vector, so map te private reply queue into blk-mq's hw
>>> queue via .host_tagset.
>>>
>>> Then the private reply mapping can be removed.
>>>
>>> Another benefit is that the request/irq lost issue may be solved in
>>> generic approach because managed IRQ may be shutdown during CPU
>>> hotplug.
>>>
>>> Signed-off-by: Ming Lei <ming.lei@redhat.com>
>>> ---
>>>  drivers/scsi/hisi_sas/hisi_sas.h       |  2 +-
>>>  drivers/scsi/hisi_sas/hisi_sas_main.c  | 36 ++++++++++----------
>>>  drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 46 +++++++++-----------------
>>>  3 files changed, 36 insertions(+), 48 deletions(-)
>>>
>>> diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
>>> index fc87994b5d73..3d48848dbde7 100644
>>> --- a/drivers/scsi/hisi_sas/hisi_sas.h
>>> +++ b/drivers/scsi/hisi_sas/hisi_sas.h
>>> @@ -26,6 +26,7 @@
>>>  #include <linux/platform_device.h>
>>>  #include <linux/property.h>
>>>  #include <linux/regmap.h>
>>> +#include <linux/blk-mq-pci.h>
>>>  #include <scsi/sas_ata.h>
>>>  #include <scsi/libsas.h>
>>>
>>> @@ -378,7 +379,6 @@ struct hisi_hba {
>>>       u32 intr_coal_count;    /* Interrupt count to coalesce */
>>>
>>>       int cq_nvecs;
>>> -     unsigned int *reply_map;
>>>
>>>       /* debugfs memories */
>>>       u32 *debugfs_global_reg;
>>> diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
>>> index 8a7feb8ed8d6..a1c1f30b9fdb 100644
>>> --- a/drivers/scsi/hisi_sas/hisi_sas_main.c
>>> +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
>>> @@ -441,6 +441,19 @@ static int hisi_sas_dif_dma_map(struct hisi_hba *hisi_hba,
>>>       return rc;
>>>  }
>>>
>>> +static struct scsi_cmnd *sas_task_to_scsi_cmd(struct sas_task *task)
>>> +{
>>> +     if (!task->uldd_task)
>>> +             return NULL;
>>> +
>>> +     if (dev_is_sata(task->dev)) {
>>> +             struct ata_queued_cmd *qc = task->uldd_task;
>>> +             return qc->scsicmd;
>>> +     } else {
>>> +             return task->uldd_task;
>>> +     }
>>> +}
>>> +
>>>  static int hisi_sas_task_prep(struct sas_task *task,
>>>                             struct hisi_sas_dq **dq_pointer,
>>>                             bool is_tmf, struct hisi_sas_tmf_task *tmf,
>>> @@ -459,6 +472,7 @@ static int hisi_sas_task_prep(struct sas_task *task,
>>>       struct hisi_sas_dq *dq;
>>>       unsigned long flags;
>>>       int wr_q_index;
>>> +     struct scsi_cmnd *scsi_cmnd;
>>>
>>>       if (DEV_IS_GONE(sas_dev)) {
>>>               if (sas_dev)
>>> @@ -471,9 +485,10 @@ static int hisi_sas_task_prep(struct sas_task *task,
>>>               return -ECOMM;
>>>       }
>>>
>>> -     if (hisi_hba->reply_map) {
>>> -             int cpu = raw_smp_processor_id();
>>> -             unsigned int dq_index = hisi_hba->reply_map[cpu];
>>> +     scsi_cmnd = sas_task_to_scsi_cmd(task);
>>> +     if (hisi_hba->shost->hostt->host_tagset) {
>>> +             unsigned int dq_index = scsi_cmnd_hctx_index(
>>> +                             hisi_hba->shost, scsi_cmnd);
>>>
>>>               *dq_pointer = dq = &hisi_hba->dq[dq_index];
>>>       } else {
>>> @@ -503,21 +518,8 @@ static int hisi_sas_task_prep(struct sas_task *task,
>>>
>>>       if (hisi_hba->hw->slot_index_alloc)
>>>               rc = hisi_hba->hw->slot_index_alloc(hisi_hba, device);
>>> -     else {
>>> -             struct scsi_cmnd *scsi_cmnd = NULL;
>>> -
>>> -             if (task->uldd_task) {
>>> -                     struct ata_queued_cmd *qc;
>>> -
>>> -                     if (dev_is_sata(device)) {
>>> -                             qc = task->uldd_task;
>>> -                             scsi_cmnd = qc->scsicmd;
>>> -                     } else {
>>> -                             scsi_cmnd = task->uldd_task;
>>> -                     }
>>> -             }
>>> +     else
>>>               rc  = hisi_sas_slot_index_alloc(hisi_hba, scsi_cmnd);
>>> -     }
>>>       if (rc < 0)
>>>               goto err_out_dif_dma_unmap;
>>>
>>> diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
>>> index 49620c2411df..063e50e5b30c 100644
>>> --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
>>> +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
>>> @@ -2344,30 +2344,6 @@ static irqreturn_t cq_interrupt_v3_hw(int irq_no, void *p)
>>>       return IRQ_HANDLED;
>>>  }
>>>
>>> -static void setup_reply_map_v3_hw(struct hisi_hba *hisi_hba, int nvecs)
>>> -{
>>> -     const struct cpumask *mask;
>>> -     int queue, cpu;
>>> -
>>> -     for (queue = 0; queue < nvecs; queue++) {
>>> -             struct hisi_sas_cq *cq = &hisi_hba->cq[queue];
>>> -
>>> -             mask = pci_irq_get_affinity(hisi_hba->pci_dev, queue +
>>> -                                         BASE_VECTORS_V3_HW);
>>> -             if (!mask)
>>> -                     goto fallback;
>>> -             cq->pci_irq_mask = mask;
>>> -             for_each_cpu(cpu, mask)
>>> -                     hisi_hba->reply_map[cpu] = queue;
>>> -     }
>>> -     return;
>>> -
>>> -fallback:
>>> -     for_each_possible_cpu(cpu)
>>> -             hisi_hba->reply_map[cpu] = cpu % hisi_hba->queue_count;
>>> -     /* Don't clean all CQ masks */
>>> -}
>>> -
>>>  static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
>>>  {
>>>       struct device *dev = hisi_hba->dev;
>>> @@ -2383,11 +2359,6 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
>>>
>>>               min_msi = MIN_AFFINE_VECTORS_V3_HW;
>>>
>>> -             hisi_hba->reply_map = devm_kcalloc(dev, nr_cpu_ids,
>>> -                                                sizeof(unsigned int),
>>> -                                                GFP_KERNEL);
>>> -             if (!hisi_hba->reply_map)
>>> -                     return -ENOMEM;
>>>               vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev,
>>>                                                        min_msi, max_msi,
>>>                                                        PCI_IRQ_MSI |
>>> @@ -2395,7 +2366,6 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
>>>                                                        &desc);
>>>               if (vectors < 0)
>>>                       return -ENOENT;
>>> -             setup_reply_map_v3_hw(hisi_hba, vectors - BASE_VECTORS_V3_HW);
>>>       } else {
>>>               min_msi = max_msi;
>>>               vectors = pci_alloc_irq_vectors(hisi_hba->pci_dev, min_msi,
>>> @@ -2896,6 +2866,18 @@ static void debugfs_snapshot_restore_v3_hw(struct hisi_hba *hisi_hba)
>>>       clear_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
>>>  }
>>>
>>> +static int hisi_sas_map_queues(struct Scsi_Host *shost)
>>> +{
>>> +     struct hisi_hba *hisi_hba = shost_priv(shost);
>>> +     struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
>>> +
>>> +     if (auto_affine_msi_experimental)
>>> +             return blk_mq_pci_map_queues(qmap, hisi_hba->pci_dev,
>>> +                             BASE_VECTORS_V3_HW);
>>> +     else
>>> +             return blk_mq_map_queues(qmap);
>>> +}
>>> +
>>>  static struct scsi_host_template sht_v3_hw = {
>>>       .name                   = DRV_NAME,
>>>       .module                 = THIS_MODULE,
>>
>> As mentioned, we should be using a common function here.
>>
>>> @@ -2906,6 +2888,8 @@ static struct scsi_host_template sht_v3_hw = {
>>>       .scan_start             = hisi_sas_scan_start,
>>>       .change_queue_depth     = sas_change_queue_depth,
>>>       .bios_param             = sas_bios_param,
>>> +     .map_queues             = hisi_sas_map_queues,
>>> +     .host_tagset            = 1,
>>>       .this_id                = -1,
>>>       .sg_tablesize           = HISI_SAS_SGE_PAGE_CNT,
>>>       .sg_prot_tablesize      = HISI_SAS_SGE_PAGE_CNT,
>>> @@ -3092,6 +3076,8 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>>       if (hisi_sas_debugfs_enable)
>>>               hisi_sas_debugfs_init(hisi_hba);
>>>
>>> +     shost->nr_hw_queues = hisi_hba->cq_nvecs;
>>> +
>>>       rc = scsi_add_host(shost, dev);
>>>       if (rc)
>>>               goto err_out_ha;
>>>
>> Well, I'd rather see the v3 hardware converted to 'real' blk-mq first;
>> the hardware itself is pretty much multiqueue already, so we should be
>> better off converting it to blk-mq.
> 
> From John Garry's input, the tags is still hostwide, then not sure how to
> partition the hostwide tags into each hw queue's tags. That can be quite
> hard to do if the queue depth isn't big enough.
> 
Shouldn't be much of an issue; the conversion to blk-mq would still be
using a host-wide tag map.
Problem is more the 'v2' hardware, which has some pretty dodgy hardware
limitations. But I'll be looking into it and will be posting a patch.

Cheers,

Hannes
Ming Lei May 31, 2019, 7:14 a.m. UTC | #4
On Fri, May 31, 2019 at 2:42 PM Hannes Reinecke <hare@suse.de> wrote:
>
> On 5/31/19 8:34 AM, Ming Lei wrote:
> > On Fri, May 31, 2019 at 2:21 PM Hannes Reinecke <hare@suse.de> wrote:
> >>
> >> On 5/31/19 4:27 AM, Ming Lei wrote:
> >>> SCSI's reply qeueue is very similar with blk-mq's hw queue, both
> >>> assigned by IRQ vector, so map te private reply queue into blk-mq's hw
> >>> queue via .host_tagset.
> >>>
> >>> Then the private reply mapping can be removed.
> >>>
> >>> Another benefit is that the request/irq lost issue may be solved in
> >>> generic approach because managed IRQ may be shutdown during CPU
> >>> hotplug.
> >>>
> >>> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> >>> ---
> >>>  drivers/scsi/hisi_sas/hisi_sas.h       |  2 +-
> >>>  drivers/scsi/hisi_sas/hisi_sas_main.c  | 36 ++++++++++----------
> >>>  drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 46 +++++++++-----------------
> >>>  3 files changed, 36 insertions(+), 48 deletions(-)
> >>>
> >>> diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
> >>> index fc87994b5d73..3d48848dbde7 100644
> >>> --- a/drivers/scsi/hisi_sas/hisi_sas.h
> >>> +++ b/drivers/scsi/hisi_sas/hisi_sas.h
> >>> @@ -26,6 +26,7 @@
> >>>  #include <linux/platform_device.h>
> >>>  #include <linux/property.h>
> >>>  #include <linux/regmap.h>
> >>> +#include <linux/blk-mq-pci.h>
> >>>  #include <scsi/sas_ata.h>
> >>>  #include <scsi/libsas.h>
> >>>
> >>> @@ -378,7 +379,6 @@ struct hisi_hba {
> >>>       u32 intr_coal_count;    /* Interrupt count to coalesce */
> >>>
> >>>       int cq_nvecs;
> >>> -     unsigned int *reply_map;
> >>>
> >>>       /* debugfs memories */
> >>>       u32 *debugfs_global_reg;
> >>> diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
> >>> index 8a7feb8ed8d6..a1c1f30b9fdb 100644
> >>> --- a/drivers/scsi/hisi_sas/hisi_sas_main.c
> >>> +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
> >>> @@ -441,6 +441,19 @@ static int hisi_sas_dif_dma_map(struct hisi_hba *hisi_hba,
> >>>       return rc;
> >>>  }
> >>>
> >>> +static struct scsi_cmnd *sas_task_to_scsi_cmd(struct sas_task *task)
> >>> +{
> >>> +     if (!task->uldd_task)
> >>> +             return NULL;
> >>> +
> >>> +     if (dev_is_sata(task->dev)) {
> >>> +             struct ata_queued_cmd *qc = task->uldd_task;
> >>> +             return qc->scsicmd;
> >>> +     } else {
> >>> +             return task->uldd_task;
> >>> +     }
> >>> +}
> >>> +
> >>>  static int hisi_sas_task_prep(struct sas_task *task,
> >>>                             struct hisi_sas_dq **dq_pointer,
> >>>                             bool is_tmf, struct hisi_sas_tmf_task *tmf,
> >>> @@ -459,6 +472,7 @@ static int hisi_sas_task_prep(struct sas_task *task,
> >>>       struct hisi_sas_dq *dq;
> >>>       unsigned long flags;
> >>>       int wr_q_index;
> >>> +     struct scsi_cmnd *scsi_cmnd;
> >>>
> >>>       if (DEV_IS_GONE(sas_dev)) {
> >>>               if (sas_dev)
> >>> @@ -471,9 +485,10 @@ static int hisi_sas_task_prep(struct sas_task *task,
> >>>               return -ECOMM;
> >>>       }
> >>>
> >>> -     if (hisi_hba->reply_map) {
> >>> -             int cpu = raw_smp_processor_id();
> >>> -             unsigned int dq_index = hisi_hba->reply_map[cpu];
> >>> +     scsi_cmnd = sas_task_to_scsi_cmd(task);
> >>> +     if (hisi_hba->shost->hostt->host_tagset) {
> >>> +             unsigned int dq_index = scsi_cmnd_hctx_index(
> >>> +                             hisi_hba->shost, scsi_cmnd);
> >>>
> >>>               *dq_pointer = dq = &hisi_hba->dq[dq_index];
> >>>       } else {
> >>> @@ -503,21 +518,8 @@ static int hisi_sas_task_prep(struct sas_task *task,
> >>>
> >>>       if (hisi_hba->hw->slot_index_alloc)
> >>>               rc = hisi_hba->hw->slot_index_alloc(hisi_hba, device);
> >>> -     else {
> >>> -             struct scsi_cmnd *scsi_cmnd = NULL;
> >>> -
> >>> -             if (task->uldd_task) {
> >>> -                     struct ata_queued_cmd *qc;
> >>> -
> >>> -                     if (dev_is_sata(device)) {
> >>> -                             qc = task->uldd_task;
> >>> -                             scsi_cmnd = qc->scsicmd;
> >>> -                     } else {
> >>> -                             scsi_cmnd = task->uldd_task;
> >>> -                     }
> >>> -             }
> >>> +     else
> >>>               rc  = hisi_sas_slot_index_alloc(hisi_hba, scsi_cmnd);
> >>> -     }
> >>>       if (rc < 0)
> >>>               goto err_out_dif_dma_unmap;
> >>>
> >>> diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
> >>> index 49620c2411df..063e50e5b30c 100644
> >>> --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
> >>> +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
> >>> @@ -2344,30 +2344,6 @@ static irqreturn_t cq_interrupt_v3_hw(int irq_no, void *p)
> >>>       return IRQ_HANDLED;
> >>>  }
> >>>
> >>> -static void setup_reply_map_v3_hw(struct hisi_hba *hisi_hba, int nvecs)
> >>> -{
> >>> -     const struct cpumask *mask;
> >>> -     int queue, cpu;
> >>> -
> >>> -     for (queue = 0; queue < nvecs; queue++) {
> >>> -             struct hisi_sas_cq *cq = &hisi_hba->cq[queue];
> >>> -
> >>> -             mask = pci_irq_get_affinity(hisi_hba->pci_dev, queue +
> >>> -                                         BASE_VECTORS_V3_HW);
> >>> -             if (!mask)
> >>> -                     goto fallback;
> >>> -             cq->pci_irq_mask = mask;
> >>> -             for_each_cpu(cpu, mask)
> >>> -                     hisi_hba->reply_map[cpu] = queue;
> >>> -     }
> >>> -     return;
> >>> -
> >>> -fallback:
> >>> -     for_each_possible_cpu(cpu)
> >>> -             hisi_hba->reply_map[cpu] = cpu % hisi_hba->queue_count;
> >>> -     /* Don't clean all CQ masks */
> >>> -}
> >>> -
> >>>  static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
> >>>  {
> >>>       struct device *dev = hisi_hba->dev;
> >>> @@ -2383,11 +2359,6 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
> >>>
> >>>               min_msi = MIN_AFFINE_VECTORS_V3_HW;
> >>>
> >>> -             hisi_hba->reply_map = devm_kcalloc(dev, nr_cpu_ids,
> >>> -                                                sizeof(unsigned int),
> >>> -                                                GFP_KERNEL);
> >>> -             if (!hisi_hba->reply_map)
> >>> -                     return -ENOMEM;
> >>>               vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev,
> >>>                                                        min_msi, max_msi,
> >>>                                                        PCI_IRQ_MSI |
> >>> @@ -2395,7 +2366,6 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
> >>>                                                        &desc);
> >>>               if (vectors < 0)
> >>>                       return -ENOENT;
> >>> -             setup_reply_map_v3_hw(hisi_hba, vectors - BASE_VECTORS_V3_HW);
> >>>       } else {
> >>>               min_msi = max_msi;
> >>>               vectors = pci_alloc_irq_vectors(hisi_hba->pci_dev, min_msi,
> >>> @@ -2896,6 +2866,18 @@ static void debugfs_snapshot_restore_v3_hw(struct hisi_hba *hisi_hba)
> >>>       clear_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
> >>>  }
> >>>
> >>> +static int hisi_sas_map_queues(struct Scsi_Host *shost)
> >>> +{
> >>> +     struct hisi_hba *hisi_hba = shost_priv(shost);
> >>> +     struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
> >>> +
> >>> +     if (auto_affine_msi_experimental)
> >>> +             return blk_mq_pci_map_queues(qmap, hisi_hba->pci_dev,
> >>> +                             BASE_VECTORS_V3_HW);
> >>> +     else
> >>> +             return blk_mq_map_queues(qmap);
> >>> +}
> >>> +
> >>>  static struct scsi_host_template sht_v3_hw = {
> >>>       .name                   = DRV_NAME,
> >>>       .module                 = THIS_MODULE,
> >>
> >> As mentioned, we should be using a common function here.
> >>
> >>> @@ -2906,6 +2888,8 @@ static struct scsi_host_template sht_v3_hw = {
> >>>       .scan_start             = hisi_sas_scan_start,
> >>>       .change_queue_depth     = sas_change_queue_depth,
> >>>       .bios_param             = sas_bios_param,
> >>> +     .map_queues             = hisi_sas_map_queues,
> >>> +     .host_tagset            = 1,
> >>>       .this_id                = -1,
> >>>       .sg_tablesize           = HISI_SAS_SGE_PAGE_CNT,
> >>>       .sg_prot_tablesize      = HISI_SAS_SGE_PAGE_CNT,
> >>> @@ -3092,6 +3076,8 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> >>>       if (hisi_sas_debugfs_enable)
> >>>               hisi_sas_debugfs_init(hisi_hba);
> >>>
> >>> +     shost->nr_hw_queues = hisi_hba->cq_nvecs;
> >>> +
> >>>       rc = scsi_add_host(shost, dev);
> >>>       if (rc)
> >>>               goto err_out_ha;
> >>>
> >> Well, I'd rather see the v3 hardware converted to 'real' blk-mq first;
> >> the hardware itself is pretty much multiqueue already, so we should be
> >> better off converting it to blk-mq.
> >
> > From John Garry's input, the tags is still hostwide, then not sure how to
> > partition the hostwide tags into each hw queue's tags. That can be quite
> > hard to do if the queue depth isn't big enough.
> >
> Shouldn't be much of an issue; the conversion to blk-mq would still be
> using a host-wide tag map.

Could you explain a bit more? Because that is exactly what this patch is doing
(expose MQ on host-wide tag)


Thanks,
Ming Lei
John Garry May 31, 2019, 11:38 a.m. UTC | #5
>>> -fallback:
>>> -     for_each_possible_cpu(cpu)
>>> -             hisi_hba->reply_map[cpu] = cpu % hisi_hba->queue_count;
>>> -     /* Don't clean all CQ masks */
>>> -}
>>> -
>>>  static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
>>>  {
>>>       struct device *dev = hisi_hba->dev;
>>> @@ -2383,11 +2359,6 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
>>>
>>>               min_msi = MIN_AFFINE_VECTORS_V3_HW;
>>>
>>> -             hisi_hba->reply_map = devm_kcalloc(dev, nr_cpu_ids,
>>> -                                                sizeof(unsigned int),
>>> -                                                GFP_KERNEL);
>>> -             if (!hisi_hba->reply_map)
>>> -                     return -ENOMEM;
>>>               vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev,
>>>                                                        min_msi, max_msi,
>>>                                                        PCI_IRQ_MSI |
>>> @@ -2395,7 +2366,6 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
>>>                                                        &desc);
>>>               if (vectors < 0)
>>>                       return -ENOENT;
>>> -             setup_reply_map_v3_hw(hisi_hba, vectors - BASE_VECTORS_V3_HW);
>>>       } else {
>>>               min_msi = max_msi;
>>>               vectors = pci_alloc_irq_vectors(hisi_hba->pci_dev, min_msi,
>>> @@ -2896,6 +2866,18 @@ static void debugfs_snapshot_restore_v3_hw(struct hisi_hba *hisi_hba)
>>>       clear_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
>>>  }
>>>
>>> +static int hisi_sas_map_queues(struct Scsi_Host *shost)
>>> +{
>>> +     struct hisi_hba *hisi_hba = shost_priv(shost);
>>> +     struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
>>> +
>>> +     if (auto_affine_msi_experimental)
>>> +             return blk_mq_pci_map_queues(qmap, hisi_hba->pci_dev,
>>> +                             BASE_VECTORS_V3_HW);
>>> +     else
>>> +             return blk_mq_map_queues(qmap);

I don't think that the mapping which blk_mq_map_queues() creates are not 
want we want. I'm guessing that we still would like a mapping similar to 
what blk_mq_pci_map_queues() produces, which is an even spread, putting 
adjacent CPUs on the same queue.

For my system with 96 cpus and 16 queues, blk_mq_map_queues() would map 
queue 0 to cpu 0, 16, 32, 48 ..., queue 1 to cpu 1, 17, 33 and so on.

>>> +}
>>> +
>>>  static struct scsi_host_template sht_v3_hw = {
>>>       .name                   = DRV_NAME,
>>>       .module                 = THIS_MODULE,
>>
>> As mentioned, we should be using a common function here.
>>
>>> @@ -2906,6 +2888,8 @@ static struct scsi_host_template sht_v3_hw = {
>>>       .scan_start             = hisi_sas_scan_start,
>>>       .change_queue_depth     = sas_change_queue_depth,
>>>       .bios_param             = sas_bios_param,
>>> +     .map_queues             = hisi_sas_map_queues,
>>> +     .host_tagset            = 1,
>>>       .this_id                = -1,
>>>       .sg_tablesize           = HISI_SAS_SGE_PAGE_CNT,
>>>       .sg_prot_tablesize      = HISI_SAS_SGE_PAGE_CNT,
>>> @@ -3092,6 +3076,8 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>>       if (hisi_sas_debugfs_enable)
>>>               hisi_sas_debugfs_init(hisi_hba);
>>>
>>> +     shost->nr_hw_queues = hisi_hba->cq_nvecs;

There's an ordering issue here, which can be fixed without too much trouble.

Value hisi_hba->cq_nvecs is not set until after this point, in 
hisi_sas_v3_probe()->hw->hw_init->hisi_sas_v3_init()->interrupt_init_v3_hw() 


Please see revised patch, below.


>>> +
>>>       rc = scsi_add_host(shost, dev);
>>>       if (rc)
>>>               goto err_out_ha;
>>>
>> Well, I'd rather see the v3 hardware converted to 'real' blk-mq first;
>> the hardware itself is pretty much multiqueue already, so we should be
>> better off converting it to blk-mq.
>
>>From John Garry's input, the tags is still hostwide, then not sure how to
> partition the hostwide tags into each hw queue's tags. That can be quite
> hard to do if the queue depth isn't big enough.

JFYI, There is no limition on which command tags can be used on which queue.

And, as I mentioned in response to "hisi_sas_v3: multiqueue support", 
the hw queue depth is configurable, and we make it same value as max 
commands tags, that being 4096.

>
> Thanks,
> Ming Lei
>
> .
>

Thanks,
John

 From b3c4ded715e1a7282f59fbd216bd2f0e852986aa Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 31 May 2019 10:27:59 +0800
Subject: [PATCH] scsi: hisi_sas_v3: convert private reply queue to blk-mq hw
  queue

SCSI's reply qeueue is very similar with blk-mq's hw queue, both
assigned by IRQ vector, so map te private reply queue into blk-mq's hw
queue via .host_tagset.

Then the private reply mapping can be removed.

Another benefit is that the request/irq lost issue may be solved in
generic approach because managed IRQ may be shutdown during CPU
hotplug.

Signed-off-by: Ming Lei <ming.lei@redhat.com>

diff --git a/drivers/scsi/hisi_sas/hisi_sas.h 
b/drivers/scsi/hisi_sas/hisi_sas.h
index fc87994b5d73..3d48848dbde7 100644
--- a/drivers/scsi/hisi_sas/hisi_sas.h
+++ b/drivers/scsi/hisi_sas/hisi_sas.h
@@ -26,6 +26,7 @@
  #include <linux/platform_device.h>
  #include <linux/property.h>
  #include <linux/regmap.h>
+#include <linux/blk-mq-pci.h>
  #include <scsi/sas_ata.h>
  #include <scsi/libsas.h>

@@ -378,7 +379,6 @@ struct hisi_hba {
  	u32 intr_coal_count;	/* Interrupt count to coalesce */

  	int cq_nvecs;
-	unsigned int *reply_map;

  	/* debugfs memories */
  	u32 *debugfs_global_reg;
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c 
b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 8a7feb8ed8d6..a1c1f30b9fdb 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -441,6 +441,19 @@ static int hisi_sas_dif_dma_map(struct hisi_hba 
*hisi_hba,
  	return rc;
  }

+static struct scsi_cmnd *sas_task_to_scsi_cmd(struct sas_task *task)
+{
+	if (!task->uldd_task)
+		return NULL;
+
+	if (dev_is_sata(task->dev)) {
+		struct ata_queued_cmd *qc = task->uldd_task;
+		return qc->scsicmd;
+	} else {
+		return task->uldd_task;
+	}
+}
+
  static int hisi_sas_task_prep(struct sas_task *task,
  			      struct hisi_sas_dq **dq_pointer,
  			      bool is_tmf, struct hisi_sas_tmf_task *tmf,
@@ -459,6 +472,7 @@ static int hisi_sas_task_prep(struct sas_task *task,
  	struct hisi_sas_dq *dq;
  	unsigned long flags;
  	int wr_q_index;
+	struct scsi_cmnd *scsi_cmnd;

  	if (DEV_IS_GONE(sas_dev)) {
  		if (sas_dev)
@@ -471,9 +485,10 @@ static int hisi_sas_task_prep(struct sas_task *task,
  		return -ECOMM;
  	}

-	if (hisi_hba->reply_map) {
-		int cpu = raw_smp_processor_id();
-		unsigned int dq_index = hisi_hba->reply_map[cpu];
+	scsi_cmnd = sas_task_to_scsi_cmd(task);
+	if (hisi_hba->shost->hostt->host_tagset) {
+		unsigned int dq_index = scsi_cmnd_hctx_index(
+				hisi_hba->shost, scsi_cmnd);

  		*dq_pointer = dq = &hisi_hba->dq[dq_index];
  	} else {
@@ -503,21 +518,8 @@ static int hisi_sas_task_prep(struct sas_task *task,

  	if (hisi_hba->hw->slot_index_alloc)
  		rc = hisi_hba->hw->slot_index_alloc(hisi_hba, device);
-	else {
-		struct scsi_cmnd *scsi_cmnd = NULL;
-
-		if (task->uldd_task) {
-			struct ata_queued_cmd *qc;
-
-			if (dev_is_sata(device)) {
-				qc = task->uldd_task;
-				scsi_cmnd = qc->scsicmd;
-			} else {
-				scsi_cmnd = task->uldd_task;
-			}
-		}
+	else
  		rc  = hisi_sas_slot_index_alloc(hisi_hba, scsi_cmnd);
-	}
  	if (rc < 0)
  		goto err_out_dif_dma_unmap;

diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c 
b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index 49620c2411df..0aa750cbefb3 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -2344,36 +2344,9 @@ static irqreturn_t cq_interrupt_v3_hw(int irq_no, 
void *p)
  	return IRQ_HANDLED;
  }

-static void setup_reply_map_v3_hw(struct hisi_hba *hisi_hba, int nvecs)
+static int interrupt_pre_init_v3_hw(struct hisi_hba *hisi_hba)
  {
-	const struct cpumask *mask;
-	int queue, cpu;
-
-	for (queue = 0; queue < nvecs; queue++) {
-		struct hisi_sas_cq *cq = &hisi_hba->cq[queue];
-
-		mask = pci_irq_get_affinity(hisi_hba->pci_dev, queue +
-					    BASE_VECTORS_V3_HW);
-		if (!mask)
-			goto fallback;
-		cq->pci_irq_mask = mask;
-		for_each_cpu(cpu, mask)
-			hisi_hba->reply_map[cpu] = queue;
-	}
-	return;
-
-fallback:
-	for_each_possible_cpu(cpu)
-		hisi_hba->reply_map[cpu] = cpu % hisi_hba->queue_count;
-	/* Don't clean all CQ masks */
-}
-
-static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
-{
-	struct device *dev = hisi_hba->dev;
-	struct pci_dev *pdev = hisi_hba->pci_dev;
-	int vectors, rc;
-	int i, k;
+	int vectors;
  	int max_msi = HISI_SAS_MSI_COUNT_V3_HW, min_msi;

  	if (auto_affine_msi_experimental) {
@@ -2383,11 +2356,6 @@ static int interrupt_init_v3_hw(struct hisi_hba 
*hisi_hba)

  		min_msi = MIN_AFFINE_VECTORS_V3_HW;

-		hisi_hba->reply_map = devm_kcalloc(dev, nr_cpu_ids,
-						   sizeof(unsigned int),
-						   GFP_KERNEL);
-		if (!hisi_hba->reply_map)
-			return -ENOMEM;
  		vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev,
  							 min_msi, max_msi,
  							 PCI_IRQ_MSI |
@@ -2395,7 +2363,6 @@ static int interrupt_init_v3_hw(struct hisi_hba 
*hisi_hba)
  							 &desc);
  		if (vectors < 0)
  			return -ENOENT;
-		setup_reply_map_v3_hw(hisi_hba, vectors - BASE_VECTORS_V3_HW);
  	} else {
  		min_msi = max_msi;
  		vectors = pci_alloc_irq_vectors(hisi_hba->pci_dev, min_msi,
@@ -2403,16 +2370,25 @@ static int interrupt_init_v3_hw(struct hisi_hba 
*hisi_hba)
  		if (vectors < 0)
  			return vectors;
  	}
-
  	hisi_hba->cq_nvecs = vectors - BASE_VECTORS_V3_HW;

+	return 0;
+}
+
+static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
+{
+	struct device *dev = hisi_hba->dev;
+	struct pci_dev *pdev = hisi_hba->pci_dev;
+	int rc, i, k;
+
+	dev_err(dev,  "%s hisi_hba->cq_nvecs=%d\n", __func__, hisi_hba->cq_nvecs);
+
  	rc = devm_request_irq(dev, pci_irq_vector(pdev, 1),
  			      int_phy_up_down_bcast_v3_hw, 0,
  			      DRV_NAME " phy", hisi_hba);
  	if (rc) {
  		dev_err(dev, "could not request phy interrupt, rc=%d\n", rc);
-		rc = -ENOENT;
-		goto free_irq_vectors;
+		return -ENOENT;
  	}

  	rc = devm_request_irq(dev, pci_irq_vector(pdev, 2),
@@ -2467,8 +2443,6 @@ static int interrupt_init_v3_hw(struct hisi_hba 
*hisi_hba)
  	free_irq(pci_irq_vector(pdev, 2), hisi_hba);
  free_phy_irq:
  	free_irq(pci_irq_vector(pdev, 1), hisi_hba);
-free_irq_vectors:
-	pci_free_irq_vectors(pdev);
  	return rc;
  }

@@ -2896,6 +2870,18 @@ static void debugfs_snapshot_restore_v3_hw(struct 
hisi_hba *hisi_hba)
  	clear_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
  }

+static int hisi_sas_map_queues(struct Scsi_Host *shost)
+{
+	struct hisi_hba *hisi_hba = shost_priv(shost);
+	struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
+
+	if (auto_affine_msi_experimental)
+		return blk_mq_pci_map_queues(qmap, hisi_hba->pci_dev,
+				BASE_VECTORS_V3_HW);
+	else
+		return blk_mq_map_queues(qmap);
+}
+
  static struct scsi_host_template sht_v3_hw = {
  	.name			= DRV_NAME,
  	.module			= THIS_MODULE,
@@ -2906,6 +2892,8 @@ static struct scsi_host_template sht_v3_hw = {
  	.scan_start		= hisi_sas_scan_start,
  	.change_queue_depth	= sas_change_queue_depth,
  	.bios_param		= sas_bios_param,
+	.map_queues		= hisi_sas_map_queues,
+	.host_tagset		= 1,
  	.this_id		= -1,
  	.sg_tablesize		= HISI_SAS_SGE_PAGE_CNT,
  	.sg_prot_tablesize	= HISI_SAS_SGE_PAGE_CNT,
@@ -3092,15 +3080,21 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const 
struct pci_device_id *id)
  	if (hisi_sas_debugfs_enable)
  		hisi_sas_debugfs_init(hisi_hba);

+
+	rc = interrupt_pre_init_v3_hw(hisi_hba);
+	if (rc < 0)
+		goto err_out_interrupts;
+	shost->nr_hw_queues = hisi_hba->cq_nvecs;
+
  	rc = scsi_add_host(shost, dev);
  	if (rc)
-		goto err_out_ha;
+		goto err_out_interrupts;

  	rc = sas_register_ha(sha);
  	if (rc)
  		goto err_out_register_ha;

-	rc = hisi_hba->hw->hw_init(hisi_hba);
+	rc = hisi_sas_v3_init(hisi_hba);
  	if (rc)
  		goto err_out_register_ha;

@@ -3110,6 +3104,8 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const 
struct pci_device_id *id)

  err_out_register_ha:
  	scsi_remove_host(shost);
+err_out_interrupts:
+	pci_free_irq_vectors(pdev);
  err_out_ha:
  	scsi_host_put(shost);
  err_out_regions:
Ming Lei June 3, 2019, 11 a.m. UTC | #6
On Fri, May 31, 2019 at 12:38:10PM +0100, John Garry wrote:
> 
> > > > -fallback:
> > > > -     for_each_possible_cpu(cpu)
> > > > -             hisi_hba->reply_map[cpu] = cpu % hisi_hba->queue_count;
> > > > -     /* Don't clean all CQ masks */
> > > > -}
> > > > -
> > > >  static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
> > > >  {
> > > >       struct device *dev = hisi_hba->dev;
> > > > @@ -2383,11 +2359,6 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
> > > > 
> > > >               min_msi = MIN_AFFINE_VECTORS_V3_HW;
> > > > 
> > > > -             hisi_hba->reply_map = devm_kcalloc(dev, nr_cpu_ids,
> > > > -                                                sizeof(unsigned int),
> > > > -                                                GFP_KERNEL);
> > > > -             if (!hisi_hba->reply_map)
> > > > -                     return -ENOMEM;
> > > >               vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev,
> > > >                                                        min_msi, max_msi,
> > > >                                                        PCI_IRQ_MSI |
> > > > @@ -2395,7 +2366,6 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
> > > >                                                        &desc);
> > > >               if (vectors < 0)
> > > >                       return -ENOENT;
> > > > -             setup_reply_map_v3_hw(hisi_hba, vectors - BASE_VECTORS_V3_HW);
> > > >       } else {
> > > >               min_msi = max_msi;
> > > >               vectors = pci_alloc_irq_vectors(hisi_hba->pci_dev, min_msi,
> > > > @@ -2896,6 +2866,18 @@ static void debugfs_snapshot_restore_v3_hw(struct hisi_hba *hisi_hba)
> > > >       clear_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
> > > >  }
> > > > 
> > > > +static int hisi_sas_map_queues(struct Scsi_Host *shost)
> > > > +{
> > > > +     struct hisi_hba *hisi_hba = shost_priv(shost);
> > > > +     struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
> > > > +
> > > > +     if (auto_affine_msi_experimental)
> > > > +             return blk_mq_pci_map_queues(qmap, hisi_hba->pci_dev,
> > > > +                             BASE_VECTORS_V3_HW);
> > > > +     else
> > > > +             return blk_mq_map_queues(qmap);
> 
> I don't think that the mapping which blk_mq_map_queues() creates are not
> want we want. I'm guessing that we still would like a mapping similar to
> what blk_mq_pci_map_queues() produces, which is an even spread, putting
> adjacent CPUs on the same queue.
> 
> For my system with 96 cpus and 16 queues, blk_mq_map_queues() would map
> queue 0 to cpu 0, 16, 32, 48 ..., queue 1 to cpu 1, 17, 33 and so on.

blk_mq_map_queues() is the default or fallback mapping in case that managed
irq isn't used. If the mapping isn't good enough, we still can improve it
in future, then any driver applying it can got improved.

> 
> > > > +}
> > > > +
> > > >  static struct scsi_host_template sht_v3_hw = {
> > > >       .name                   = DRV_NAME,
> > > >       .module                 = THIS_MODULE,
> > > 
> > > As mentioned, we should be using a common function here.
> > > 
> > > > @@ -2906,6 +2888,8 @@ static struct scsi_host_template sht_v3_hw = {
> > > >       .scan_start             = hisi_sas_scan_start,
> > > >       .change_queue_depth     = sas_change_queue_depth,
> > > >       .bios_param             = sas_bios_param,
> > > > +     .map_queues             = hisi_sas_map_queues,
> > > > +     .host_tagset            = 1,
> > > >       .this_id                = -1,
> > > >       .sg_tablesize           = HISI_SAS_SGE_PAGE_CNT,
> > > >       .sg_prot_tablesize      = HISI_SAS_SGE_PAGE_CNT,
> > > > @@ -3092,6 +3076,8 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> > > >       if (hisi_sas_debugfs_enable)
> > > >               hisi_sas_debugfs_init(hisi_hba);
> > > > 
> > > > +     shost->nr_hw_queues = hisi_hba->cq_nvecs;
> 
> There's an ordering issue here, which can be fixed without too much trouble.
> 
> Value hisi_hba->cq_nvecs is not set until after this point, in
> hisi_sas_v3_probe()->hw->hw_init->hisi_sas_v3_init()->interrupt_init_v3_hw()
> 
> 
> Please see revised patch, below.

Good catch, will integrate it in V2.

Thanks,
Ming
John Garry June 3, 2019, 1 p.m. UTC | #7
On 03/06/2019 12:00, Ming Lei wrote:
> On Fri, May 31, 2019 at 12:38:10PM +0100, John Garry wrote:
>>
>>>>> -fallback:
>>>>> -     for_each_possible_cpu(cpu)
>>>>> -             hisi_hba->reply_map[cpu] = cpu % hisi_hba->queue_count;
>>>>> -     /* Don't clean all CQ masks */
>>>>> -}
>>>>> -
>>>>>  static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
>>>>>  {
>>>>>       struct device *dev = hisi_hba->dev;
>>>>> @@ -2383,11 +2359,6 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
>>>>>
>>>>>               min_msi = MIN_AFFINE_VECTORS_V3_HW;
>>>>>
>>>>> -             hisi_hba->reply_map = devm_kcalloc(dev, nr_cpu_ids,
>>>>> -                                                sizeof(unsigned int),
>>>>> -                                                GFP_KERNEL);
>>>>> -             if (!hisi_hba->reply_map)
>>>>> -                     return -ENOMEM;
>>>>>               vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev,
>>>>>                                                        min_msi, max_msi,
>>>>>                                                        PCI_IRQ_MSI |
>>>>> @@ -2395,7 +2366,6 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
>>>>>                                                        &desc);
>>>>>               if (vectors < 0)
>>>>>                       return -ENOENT;
>>>>> -             setup_reply_map_v3_hw(hisi_hba, vectors - BASE_VECTORS_V3_HW);
>>>>>       } else {
>>>>>               min_msi = max_msi;
>>>>>               vectors = pci_alloc_irq_vectors(hisi_hba->pci_dev, min_msi,
>>>>> @@ -2896,6 +2866,18 @@ static void debugfs_snapshot_restore_v3_hw(struct hisi_hba *hisi_hba)
>>>>>       clear_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
>>>>>  }
>>>>>
>>>>> +static int hisi_sas_map_queues(struct Scsi_Host *shost)
>>>>> +{
>>>>> +     struct hisi_hba *hisi_hba = shost_priv(shost);
>>>>> +     struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
>>>>> +
>>>>> +     if (auto_affine_msi_experimental)
>>>>> +             return blk_mq_pci_map_queues(qmap, hisi_hba->pci_dev,
>>>>> +                             BASE_VECTORS_V3_HW);
>>>>> +     else
>>>>> +             return blk_mq_map_queues(qmap);
>>
>> I don't think that the mapping which blk_mq_map_queues() creates are not
>> want we want. I'm guessing that we still would like a mapping similar to
>> what blk_mq_pci_map_queues() produces, which is an even spread, putting
>> adjacent CPUs on the same queue.
>>
>> For my system with 96 cpus and 16 queues, blk_mq_map_queues() would map
>> queue 0 to cpu 0, 16, 32, 48 ..., queue 1 to cpu 1, 17, 33 and so on.
>

Hi Ming,

> blk_mq_map_queues() is the default or fallback mapping in case that managed
> irq isn't used. If the mapping isn't good enough, we still can improve it
> in future, then any driver applying it can got improved.
>

That's the right attitude. However, as I see, we can only know the 
mapping when we know the interrupt affinity or some other mapping 
restriction or rule etc, which we don't know in this case.

For now, personally I would rather if we only expose multiple queues for 
when auto_affine_msi_experimental is set. I fear that we may make a 
performance regression for !auto_affine_msi_experimental with this 
patch. We would need to test.

Hopefully we can drop !auto_affine_msi_experimental support when CPU 
hotplug issue is resolved.

>>
>>>>> +}
>>>>> +
>>>>>  static struct scsi_host_template sht_v3_hw = {
>>>>>       .name                   = DRV_NAME,
>>>>>       .module                 = THIS_MODULE,
>>>>
>>>> As mentioned, we should be using a common function here.
>>>>
>>>>> @@ -2906,6 +2888,8 @@ static struct scsi_host_template sht_v3_hw = {
>>>>>       .scan_start             = hisi_sas_scan_start,
>>>>>       .change_queue_depth     = sas_change_queue_depth,
>>>>>       .bios_param             = sas_bios_param,
>>>>> +     .map_queues             = hisi_sas_map_queues,
>>>>> +     .host_tagset            = 1,
>>>>>       .this_id                = -1,
>>>>>       .sg_tablesize           = HISI_SAS_SGE_PAGE_CNT,
>>>>>       .sg_prot_tablesize      = HISI_SAS_SGE_PAGE_CNT,
>>>>> @@ -3092,6 +3076,8 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>>>>       if (hisi_sas_debugfs_enable)
>>>>>               hisi_sas_debugfs_init(hisi_hba);
>>>>>
>>>>> +     shost->nr_hw_queues = hisi_hba->cq_nvecs;
>>
>> There's an ordering issue here, which can be fixed without too much trouble.
>>
>> Value hisi_hba->cq_nvecs is not set until after this point, in
>> hisi_sas_v3_probe()->hw->hw_init->hisi_sas_v3_init()->interrupt_init_v3_hw()
>>
>>
>> Please see revised patch, below.
>
> Good catch, will integrate it in V2.
>

Thanks!

> Thanks,
> Ming
>
> .
>
Ming Lei June 4, 2019, 1:37 p.m. UTC | #8
On Mon, Jun 03, 2019 at 02:00:19PM +0100, John Garry wrote:
> On 03/06/2019 12:00, Ming Lei wrote:
> > On Fri, May 31, 2019 at 12:38:10PM +0100, John Garry wrote:
> > > 
> > > > > > -fallback:
> > > > > > -     for_each_possible_cpu(cpu)
> > > > > > -             hisi_hba->reply_map[cpu] = cpu % hisi_hba->queue_count;
> > > > > > -     /* Don't clean all CQ masks */
> > > > > > -}
> > > > > > -
> > > > > >  static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
> > > > > >  {
> > > > > >       struct device *dev = hisi_hba->dev;
> > > > > > @@ -2383,11 +2359,6 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
> > > > > > 
> > > > > >               min_msi = MIN_AFFINE_VECTORS_V3_HW;
> > > > > > 
> > > > > > -             hisi_hba->reply_map = devm_kcalloc(dev, nr_cpu_ids,
> > > > > > -                                                sizeof(unsigned int),
> > > > > > -                                                GFP_KERNEL);
> > > > > > -             if (!hisi_hba->reply_map)
> > > > > > -                     return -ENOMEM;
> > > > > >               vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev,
> > > > > >                                                        min_msi, max_msi,
> > > > > >                                                        PCI_IRQ_MSI |
> > > > > > @@ -2395,7 +2366,6 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
> > > > > >                                                        &desc);
> > > > > >               if (vectors < 0)
> > > > > >                       return -ENOENT;
> > > > > > -             setup_reply_map_v3_hw(hisi_hba, vectors - BASE_VECTORS_V3_HW);
> > > > > >       } else {
> > > > > >               min_msi = max_msi;
> > > > > >               vectors = pci_alloc_irq_vectors(hisi_hba->pci_dev, min_msi,
> > > > > > @@ -2896,6 +2866,18 @@ static void debugfs_snapshot_restore_v3_hw(struct hisi_hba *hisi_hba)
> > > > > >       clear_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
> > > > > >  }
> > > > > > 
> > > > > > +static int hisi_sas_map_queues(struct Scsi_Host *shost)
> > > > > > +{
> > > > > > +     struct hisi_hba *hisi_hba = shost_priv(shost);
> > > > > > +     struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
> > > > > > +
> > > > > > +     if (auto_affine_msi_experimental)
> > > > > > +             return blk_mq_pci_map_queues(qmap, hisi_hba->pci_dev,
> > > > > > +                             BASE_VECTORS_V3_HW);
> > > > > > +     else
> > > > > > +             return blk_mq_map_queues(qmap);
> > > 
> > > I don't think that the mapping which blk_mq_map_queues() creates are not
> > > want we want. I'm guessing that we still would like a mapping similar to
> > > what blk_mq_pci_map_queues() produces, which is an even spread, putting
> > > adjacent CPUs on the same queue.
> > > 
> > > For my system with 96 cpus and 16 queues, blk_mq_map_queues() would map
> > > queue 0 to cpu 0, 16, 32, 48 ..., queue 1 to cpu 1, 17, 33 and so on.
> > 
> 
> Hi Ming,
> 
> > blk_mq_map_queues() is the default or fallback mapping in case that managed
> > irq isn't used. If the mapping isn't good enough, we still can improve it
> > in future, then any driver applying it can got improved.
> > 
> 
> That's the right attitude. However, as I see, we can only know the mapping
> when we know the interrupt affinity or some other mapping restriction or
> rule etc, which we don't know in this case.
> 
> For now, personally I would rather if we only expose multiple queues for
> when auto_affine_msi_experimental is set. I fear that we may make a
> performance regression for !auto_affine_msi_experimental with this patch. We
> would need to test.

I suggest to use the blk-mq generic helper.

The default queue mapping of blk_mq_map_queues() has been used for a
while, so far so good, such as, very similar way is applied on
megaraid_sas and mpt3sas, see _base_assign_reply_queues() and
megasas_setup_reply_map().

If performance drop is caused, just report it out, we could fix it.
Or even you can write a new .map_queues method just for hisi_sas v3.


Thanks,
Ming
diff mbox series

Patch

diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
index fc87994b5d73..3d48848dbde7 100644
--- a/drivers/scsi/hisi_sas/hisi_sas.h
+++ b/drivers/scsi/hisi_sas/hisi_sas.h
@@ -26,6 +26,7 @@ 
 #include <linux/platform_device.h>
 #include <linux/property.h>
 #include <linux/regmap.h>
+#include <linux/blk-mq-pci.h>
 #include <scsi/sas_ata.h>
 #include <scsi/libsas.h>
 
@@ -378,7 +379,6 @@  struct hisi_hba {
 	u32 intr_coal_count;	/* Interrupt count to coalesce */
 
 	int cq_nvecs;
-	unsigned int *reply_map;
 
 	/* debugfs memories */
 	u32 *debugfs_global_reg;
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 8a7feb8ed8d6..a1c1f30b9fdb 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -441,6 +441,19 @@  static int hisi_sas_dif_dma_map(struct hisi_hba *hisi_hba,
 	return rc;
 }
 
+static struct scsi_cmnd *sas_task_to_scsi_cmd(struct sas_task *task)
+{
+	if (!task->uldd_task)
+		return NULL;
+
+	if (dev_is_sata(task->dev)) {
+		struct ata_queued_cmd *qc = task->uldd_task;
+		return qc->scsicmd;
+	} else {
+		return task->uldd_task;
+	}
+}
+
 static int hisi_sas_task_prep(struct sas_task *task,
 			      struct hisi_sas_dq **dq_pointer,
 			      bool is_tmf, struct hisi_sas_tmf_task *tmf,
@@ -459,6 +472,7 @@  static int hisi_sas_task_prep(struct sas_task *task,
 	struct hisi_sas_dq *dq;
 	unsigned long flags;
 	int wr_q_index;
+	struct scsi_cmnd *scsi_cmnd;
 
 	if (DEV_IS_GONE(sas_dev)) {
 		if (sas_dev)
@@ -471,9 +485,10 @@  static int hisi_sas_task_prep(struct sas_task *task,
 		return -ECOMM;
 	}
 
-	if (hisi_hba->reply_map) {
-		int cpu = raw_smp_processor_id();
-		unsigned int dq_index = hisi_hba->reply_map[cpu];
+	scsi_cmnd = sas_task_to_scsi_cmd(task);
+	if (hisi_hba->shost->hostt->host_tagset) {
+		unsigned int dq_index = scsi_cmnd_hctx_index(
+				hisi_hba->shost, scsi_cmnd);
 
 		*dq_pointer = dq = &hisi_hba->dq[dq_index];
 	} else {
@@ -503,21 +518,8 @@  static int hisi_sas_task_prep(struct sas_task *task,
 
 	if (hisi_hba->hw->slot_index_alloc)
 		rc = hisi_hba->hw->slot_index_alloc(hisi_hba, device);
-	else {
-		struct scsi_cmnd *scsi_cmnd = NULL;
-
-		if (task->uldd_task) {
-			struct ata_queued_cmd *qc;
-
-			if (dev_is_sata(device)) {
-				qc = task->uldd_task;
-				scsi_cmnd = qc->scsicmd;
-			} else {
-				scsi_cmnd = task->uldd_task;
-			}
-		}
+	else
 		rc  = hisi_sas_slot_index_alloc(hisi_hba, scsi_cmnd);
-	}
 	if (rc < 0)
 		goto err_out_dif_dma_unmap;
 
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index 49620c2411df..063e50e5b30c 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -2344,30 +2344,6 @@  static irqreturn_t cq_interrupt_v3_hw(int irq_no, void *p)
 	return IRQ_HANDLED;
 }
 
-static void setup_reply_map_v3_hw(struct hisi_hba *hisi_hba, int nvecs)
-{
-	const struct cpumask *mask;
-	int queue, cpu;
-
-	for (queue = 0; queue < nvecs; queue++) {
-		struct hisi_sas_cq *cq = &hisi_hba->cq[queue];
-
-		mask = pci_irq_get_affinity(hisi_hba->pci_dev, queue +
-					    BASE_VECTORS_V3_HW);
-		if (!mask)
-			goto fallback;
-		cq->pci_irq_mask = mask;
-		for_each_cpu(cpu, mask)
-			hisi_hba->reply_map[cpu] = queue;
-	}
-	return;
-
-fallback:
-	for_each_possible_cpu(cpu)
-		hisi_hba->reply_map[cpu] = cpu % hisi_hba->queue_count;
-	/* Don't clean all CQ masks */
-}
-
 static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
 {
 	struct device *dev = hisi_hba->dev;
@@ -2383,11 +2359,6 @@  static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
 
 		min_msi = MIN_AFFINE_VECTORS_V3_HW;
 
-		hisi_hba->reply_map = devm_kcalloc(dev, nr_cpu_ids,
-						   sizeof(unsigned int),
-						   GFP_KERNEL);
-		if (!hisi_hba->reply_map)
-			return -ENOMEM;
 		vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev,
 							 min_msi, max_msi,
 							 PCI_IRQ_MSI |
@@ -2395,7 +2366,6 @@  static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
 							 &desc);
 		if (vectors < 0)
 			return -ENOENT;
-		setup_reply_map_v3_hw(hisi_hba, vectors - BASE_VECTORS_V3_HW);
 	} else {
 		min_msi = max_msi;
 		vectors = pci_alloc_irq_vectors(hisi_hba->pci_dev, min_msi,
@@ -2896,6 +2866,18 @@  static void debugfs_snapshot_restore_v3_hw(struct hisi_hba *hisi_hba)
 	clear_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
 }
 
+static int hisi_sas_map_queues(struct Scsi_Host *shost)
+{
+	struct hisi_hba *hisi_hba = shost_priv(shost);
+	struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
+
+	if (auto_affine_msi_experimental)
+		return blk_mq_pci_map_queues(qmap, hisi_hba->pci_dev,
+				BASE_VECTORS_V3_HW);
+	else
+		return blk_mq_map_queues(qmap);
+}
+
 static struct scsi_host_template sht_v3_hw = {
 	.name			= DRV_NAME,
 	.module			= THIS_MODULE,
@@ -2906,6 +2888,8 @@  static struct scsi_host_template sht_v3_hw = {
 	.scan_start		= hisi_sas_scan_start,
 	.change_queue_depth	= sas_change_queue_depth,
 	.bios_param		= sas_bios_param,
+	.map_queues		= hisi_sas_map_queues,
+	.host_tagset		= 1,
 	.this_id		= -1,
 	.sg_tablesize		= HISI_SAS_SGE_PAGE_CNT,
 	.sg_prot_tablesize	= HISI_SAS_SGE_PAGE_CNT,
@@ -3092,6 +3076,8 @@  hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (hisi_sas_debugfs_enable)
 		hisi_sas_debugfs_init(hisi_hba);
 
+	shost->nr_hw_queues = hisi_hba->cq_nvecs;
+
 	rc = scsi_add_host(shost, dev);
 	if (rc)
 		goto err_out_ha;