diff mbox series

[v2,1/6] blk-mq: introduce blk_mq_hctx_map_queues

Message ID 20241111-refactor-blk-affinity-helpers-v2-1-f360ddad231a@kernel.org (mailing list archive)
State Superseded
Delegated to: Krzysztof WilczyƄski
Headers show
Series blk: refactor queue affinity helpers | expand

Commit Message

Daniel Wagner Nov. 11, 2024, 6:02 p.m. UTC
blk_mq_pci_map_queues and blk_mq_virtio_map_queues will create a CPU to
hardware queue mapping based on affinity information. These two function
share common code and only differ on how the affinity information is
retrieved. Also, those functions are located in the block subsystem
where it doesn't really fit in. They are virtio and pci subsystem
specific.

Introduce a new callback in struct bus_type to get the affinity mask.
The callbacks can then be populated by the subsystem directly.

All but one driver use the subsystem default affinity masks. hisi_sas v2
depends on a driver specific mapping, thus use the optional argument
get_queue_affinity to retrieve the mapping.

Original-by : Ming Lei <ming.lei@redhat.com>
Signed-off-by: Daniel Wagner <wagi@kernel.org>
---
 block/blk-mq-cpumap.c      | 40 ++++++++++++++++++++++++++++++++++++++++
 drivers/pci/pci-driver.c   | 16 ++++++++++++++++
 drivers/virtio/virtio.c    | 12 ++++++++++++
 include/linux/blk-mq.h     |  5 +++++
 include/linux/device/bus.h |  3 +++
 5 files changed, 76 insertions(+)

Comments

Bjorn Helgaas Nov. 11, 2024, 7 p.m. UTC | #1
On Mon, Nov 11, 2024 at 07:02:09PM +0100, Daniel Wagner wrote:
> blk_mq_pci_map_queues and blk_mq_virtio_map_queues will create a CPU to
> hardware queue mapping based on affinity information. These two function
> share common code and only differ on how the affinity information is
> retrieved. Also, those functions are located in the block subsystem
> where it doesn't really fit in. They are virtio and pci subsystem
> specific.
> 
> Introduce a new callback in struct bus_type to get the affinity mask.
> The callbacks can then be populated by the subsystem directly.
> 
> All but one driver use the subsystem default affinity masks. hisi_sas v2
> depends on a driver specific mapping, thus use the optional argument
> get_queue_affinity to retrieve the mapping.
> 
> Original-by : Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Daniel Wagner <wagi@kernel.org>
> ---
>  block/blk-mq-cpumap.c      | 40 ++++++++++++++++++++++++++++++++++++++++
>  drivers/pci/pci-driver.c   | 16 ++++++++++++++++
>  drivers/virtio/virtio.c    | 12 ++++++++++++
>  include/linux/blk-mq.h     |  5 +++++
>  include/linux/device/bus.h |  3 +++
>  5 files changed, 76 insertions(+)
> 
> diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
> index 9638b25fd52124f0173e968ebdca5f1fe0b42ad9..4dd703f5ee647fd1ba0b14ca11ddfdefa98a9a25 100644
> --- a/block/blk-mq-cpumap.c
> +++ b/block/blk-mq-cpumap.c
> @@ -54,3 +54,43 @@ int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int index)
>  
>  	return NUMA_NO_NODE;
>  }
> +
> +/**
> + * blk_mq_hctx_map_queues - Create CPU to hardware queue mapping
> + * @qmap:	CPU to hardware queue map.
> + * @dev:	The device to map queues.
> + * @offset:	Queue offset to use for the device.
> + * @get_irq_affinity:	Optional callback to retrieve queue affinity.
> + *
> + * Create a CPU to hardware queue mapping in @qmap. For each queue
> + * @get_queue_affinity will be called. If @get_queue_affinity is not
> + * provided, then the bus_type irq_get_affinity callback will be
> + * used to retrieve the affinity.
> + */
> +void blk_mq_hctx_map_queues(struct blk_mq_queue_map *qmap,
> +			    struct device *dev, unsigned int offset,
> +			    get_queue_affinity_fn *get_irq_affinity)
> +{
> +	const struct cpumask *mask = NULL;
> +	unsigned int queue, cpu;
> +
> +	for (queue = 0; queue < qmap->nr_queues; queue++) {
> +		if (get_irq_affinity)
> +			mask = get_irq_affinity(dev, queue + offset);
> +		else if (dev->bus->irq_get_affinity)
> +			mask = dev->bus->irq_get_affinity(dev, queue + offset);
> +
> +		if (!mask)
> +			goto fallback;
> +
> +		for_each_cpu(cpu, mask)
> +			qmap->mq_map[cpu] = qmap->queue_offset + queue;
> +	}
> +
> +	return;
> +
> +fallback:
> +	WARN_ON_ONCE(qmap->nr_queues > 1);
> +	blk_mq_clear_mq_map(qmap);
> +}
> +EXPORT_SYMBOL_GPL(blk_mq_hctx_map_queues);
> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
> index 35270172c833186995aebdda6f95ab3ffd7c67a0..59e5f430a380285162a87bd1a9b392bba8066450 100644
> --- a/drivers/pci/pci-driver.c
> +++ b/drivers/pci/pci-driver.c
> @@ -1670,6 +1670,21 @@ static void pci_dma_cleanup(struct device *dev)
>  		iommu_device_unuse_default_domain(dev);
>  }
>  
> +/**
> + * pci_device_irq_get_affinity - get affinity mask queue mapping for PCI device
> + * @dev: ptr to dev structure
> + * @irq_vec: interrupt vector number
> + *
> + * This function returns for a queue the affinity mask for a PCI device.

From the PCI core perspective, this is only the *IRQ* affinity mask.
The queue connection is not relevant here and probably confusing.

  ... - get IRQ affinity mask for device

  Return the CPU affinity mask for @dev and @irq_vec.

With the above changes,

Acked-by: Bjorn Helgaas <bhelgaas@google.com>

> + */
> +static const struct cpumask *pci_device_irq_get_affinity(struct device *dev,
> +					unsigned int irq_vec)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +
> +	return pci_irq_get_affinity(pdev, irq_vec);
> +}
> +
>  const struct bus_type pci_bus_type = {
>  	.name		= "pci",
>  	.match		= pci_bus_match,
> @@ -1677,6 +1692,7 @@ const struct bus_type pci_bus_type = {
>  	.probe		= pci_device_probe,
>  	.remove		= pci_device_remove,
>  	.shutdown	= pci_device_shutdown,
> +	.irq_get_affinity = pci_device_irq_get_affinity,
>  	.dev_groups	= pci_dev_groups,
>  	.bus_groups	= pci_bus_groups,
>  	.drv_groups	= pci_drv_groups,
> diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> index b9095751e43bb7db5fc991b0cc0979d2e86f7b9b..86390db7e74befa17c9fa146ab6b454bbae3b7f5 100644
> --- a/drivers/virtio/virtio.c
> +++ b/drivers/virtio/virtio.c
> @@ -377,6 +377,17 @@ static void virtio_dev_remove(struct device *_d)
>  	of_node_put(dev->dev.of_node);
>  }
>  
> +static const struct cpumask *virtio_irq_get_affinity(struct device *_d,
> +						     unsigned int irq_veq)
> +{
> +	struct virtio_device *dev = dev_to_virtio(_d);
> +
> +	if (!dev->config->get_vq_affinity)
> +		return NULL;
> +
> +	return dev->config->get_vq_affinity(dev, irq_veq);
> +}
> +
>  static const struct bus_type virtio_bus = {
>  	.name  = "virtio",
>  	.match = virtio_dev_match,
> @@ -384,6 +395,7 @@ static const struct bus_type virtio_bus = {
>  	.uevent = virtio_uevent,
>  	.probe = virtio_dev_probe,
>  	.remove = virtio_dev_remove,
> +	.irq_get_affinity = virtio_irq_get_affinity,
>  };
>  
>  int __register_virtio_driver(struct virtio_driver *driver, struct module *owner)
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index 2035fad3131fb60781957095ce8a3a941dd104be..6b40af77bf44afa7112d274b731b591f2a67d68c 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -922,7 +922,12 @@ int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
>  void blk_mq_unfreeze_queue_non_owner(struct request_queue *q);
>  void blk_freeze_queue_start_non_owner(struct request_queue *q);
>  
> +typedef const struct cpumask *(get_queue_affinity_fn)(struct device *dev,
> +						      unsigned int queue);
>  void blk_mq_map_queues(struct blk_mq_queue_map *qmap);
> +void blk_mq_hctx_map_queues(struct blk_mq_queue_map *qmap,
> +			    struct device *dev, unsigned int offset,
> +			    get_queue_affinity_fn *get_queue_affinity);
>  void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
>  
>  void blk_mq_quiesce_queue_nowait(struct request_queue *q);
> diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h
> index cdc4757217f9bb4b36b5c3b8a48bab45737e44c5..b18658bce2c3819fc1cbeb38fb98391d56ec3317 100644
> --- a/include/linux/device/bus.h
> +++ b/include/linux/device/bus.h
> @@ -48,6 +48,7 @@ struct fwnode_handle;
>   *		will never get called until they do.
>   * @remove:	Called when a device removed from this bus.
>   * @shutdown:	Called at shut-down time to quiesce the device.
> + * @irq_get_affinity:	Get IRQ affinity mask for the device on this bus.
>   *
>   * @online:	Called to put the device back online (after offlining it).
>   * @offline:	Called to put the device offline for hot-removal. May fail.
> @@ -87,6 +88,8 @@ struct bus_type {
>  	void (*sync_state)(struct device *dev);
>  	void (*remove)(struct device *dev);
>  	void (*shutdown)(struct device *dev);
> +	const struct cpumask *(*irq_get_affinity)(struct device *dev,
> +			unsigned int irq_vec);
>  
>  	int (*online)(struct device *dev);
>  	int (*offline)(struct device *dev);
> 
> -- 
> 2.47.0
>
Christoph Hellwig Nov. 12, 2024, 4:47 a.m. UTC | #2
On Mon, Nov 11, 2024 at 07:02:09PM +0100, Daniel Wagner wrote:
> blk_mq_pci_map_queues and blk_mq_virtio_map_queues will create a CPU to
> hardware queue mapping based on affinity information. These two function
> share common code and only differ on how the affinity information is
> retrieved. Also, those functions are located in the block subsystem
> where it doesn't really fit in. They are virtio and pci subsystem
> specific.
> 
> Introduce a new callback in struct bus_type to get the affinity mask.
> The callbacks can then be populated by the subsystem directly.
> 
> All but one driver use the subsystem default affinity masks. hisi_sas v2
> depends on a driver specific mapping, thus use the optional argument
> get_queue_affinity to retrieve the mapping.

This seems to mix up a few different things:

 1) adding a new bus operation
 2) implementations of that operation for PCI and virtio
 3) a block layer consumer of the operation

all these really should be separate per-subsystem patches.

You'll also need to Cc the driver model maintainers.

> +void blk_mq_hctx_map_queues(struct blk_mq_queue_map *qmap,
> +			    struct device *dev, unsigned int offset,
> +			    get_queue_affinity_fn *get_irq_affinity)
> +{
> +	const struct cpumask *mask = NULL;
> +	unsigned int queue, cpu;
> +
> +	for (queue = 0; queue < qmap->nr_queues; queue++) {
> +		if (get_irq_affinity)
> +			mask = get_irq_affinity(dev, queue + offset);
> +		else if (dev->bus->irq_get_affinity)
> +			mask = dev->bus->irq_get_affinity(dev, queue + offset);
> +
> +		if (!mask)
> +			goto fallback;
> +
> +		for_each_cpu(cpu, mask)
> +			qmap->mq_map[cpu] = qmap->queue_offset + queue;
> +	}

This does different things with a NULL argument vs not.  Please split it
into two separate helpers.  The non-NULL case is only uses in hisi_sas,
so it might be worth just open coding it there as well.

> +static const struct cpumask *pci_device_irq_get_affinity(struct device *dev,
> +					unsigned int irq_vec)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +
> +	return pci_irq_get_affinity(pdev, irq_vec);

Nit: this could be shortened to:

	return pci_irq_get_affinity(to_pci_dev(dev), irq_vec);
Daniel Wagner Nov. 12, 2024, 7:08 a.m. UTC | #3
On Tue, Nov 12, 2024 at 05:47:36AM +0100, Christoph Hellwig wrote:
> This seems to mix up a few different things:
> 
>  1) adding a new bus operation
>  2) implementations of that operation for PCI and virtio
>  3) a block layer consumer of the operation
> 
> all these really should be separate per-subsystem patches.
> 
> You'll also need to Cc the driver model maintainers.

Ok, will do.

> > +void blk_mq_hctx_map_queues(struct blk_mq_queue_map *qmap,
> > +			    struct device *dev, unsigned int offset,
> > +			    get_queue_affinity_fn *get_irq_affinity)
> > +{
> > +	const struct cpumask *mask = NULL;
> > +	unsigned int queue, cpu;
> > +
> > +	for (queue = 0; queue < qmap->nr_queues; queue++) {
> > +		if (get_irq_affinity)
> > +			mask = get_irq_affinity(dev, queue + offset);
> > +		else if (dev->bus->irq_get_affinity)
> > +			mask = dev->bus->irq_get_affinity(dev, queue + offset);
> > +
> > +		if (!mask)
> > +			goto fallback;
> > +
> > +		for_each_cpu(cpu, mask)
> > +			qmap->mq_map[cpu] = qmap->queue_offset + queue;
> > +	}
> 
> This does different things with a NULL argument vs not.  Please split it
> into two separate helpers.  The non-NULL case is only uses in hisi_sas,
> so it might be worth just open coding it there as well.

I'd like to avoid the open coding case, because this will make the
following patches to support the isolated cpu  patches more complex.
Having a central place where the all the mask operation are, makes it a
bit simpler streamlined. But then I could open code that part as well.

> > +static const struct cpumask *pci_device_irq_get_affinity(struct device *dev,
> > +					unsigned int irq_vec)
> > +{
> > +	struct pci_dev *pdev = to_pci_dev(dev);
> > +
> > +	return pci_irq_get_affinity(pdev, irq_vec);
> 
> Nit: this could be shortened to:
> 
> 	return pci_irq_get_affinity(to_pci_dev(dev), irq_vec);

Ok.

Thanks,
Daniel
diff mbox series

Patch

diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 9638b25fd52124f0173e968ebdca5f1fe0b42ad9..4dd703f5ee647fd1ba0b14ca11ddfdefa98a9a25 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -54,3 +54,43 @@  int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int index)
 
 	return NUMA_NO_NODE;
 }
+
+/**
+ * blk_mq_hctx_map_queues - Create CPU to hardware queue mapping
+ * @qmap:	CPU to hardware queue map.
+ * @dev:	The device to map queues.
+ * @offset:	Queue offset to use for the device.
+ * @get_irq_affinity:	Optional callback to retrieve queue affinity.
+ *
+ * Create a CPU to hardware queue mapping in @qmap. For each queue
+ * @get_queue_affinity will be called. If @get_queue_affinity is not
+ * provided, then the bus_type irq_get_affinity callback will be
+ * used to retrieve the affinity.
+ */
+void blk_mq_hctx_map_queues(struct blk_mq_queue_map *qmap,
+			    struct device *dev, unsigned int offset,
+			    get_queue_affinity_fn *get_irq_affinity)
+{
+	const struct cpumask *mask = NULL;
+	unsigned int queue, cpu;
+
+	for (queue = 0; queue < qmap->nr_queues; queue++) {
+		if (get_irq_affinity)
+			mask = get_irq_affinity(dev, queue + offset);
+		else if (dev->bus->irq_get_affinity)
+			mask = dev->bus->irq_get_affinity(dev, queue + offset);
+
+		if (!mask)
+			goto fallback;
+
+		for_each_cpu(cpu, mask)
+			qmap->mq_map[cpu] = qmap->queue_offset + queue;
+	}
+
+	return;
+
+fallback:
+	WARN_ON_ONCE(qmap->nr_queues > 1);
+	blk_mq_clear_mq_map(qmap);
+}
+EXPORT_SYMBOL_GPL(blk_mq_hctx_map_queues);
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 35270172c833186995aebdda6f95ab3ffd7c67a0..59e5f430a380285162a87bd1a9b392bba8066450 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -1670,6 +1670,21 @@  static void pci_dma_cleanup(struct device *dev)
 		iommu_device_unuse_default_domain(dev);
 }
 
+/**
+ * pci_device_irq_get_affinity - get affinity mask queue mapping for PCI device
+ * @dev: ptr to dev structure
+ * @irq_vec: interrupt vector number
+ *
+ * This function returns for a queue the affinity mask for a PCI device.
+ */
+static const struct cpumask *pci_device_irq_get_affinity(struct device *dev,
+					unsigned int irq_vec)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return pci_irq_get_affinity(pdev, irq_vec);
+}
+
 const struct bus_type pci_bus_type = {
 	.name		= "pci",
 	.match		= pci_bus_match,
@@ -1677,6 +1692,7 @@  const struct bus_type pci_bus_type = {
 	.probe		= pci_device_probe,
 	.remove		= pci_device_remove,
 	.shutdown	= pci_device_shutdown,
+	.irq_get_affinity = pci_device_irq_get_affinity,
 	.dev_groups	= pci_dev_groups,
 	.bus_groups	= pci_bus_groups,
 	.drv_groups	= pci_drv_groups,
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index b9095751e43bb7db5fc991b0cc0979d2e86f7b9b..86390db7e74befa17c9fa146ab6b454bbae3b7f5 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -377,6 +377,17 @@  static void virtio_dev_remove(struct device *_d)
 	of_node_put(dev->dev.of_node);
 }
 
+static const struct cpumask *virtio_irq_get_affinity(struct device *_d,
+						     unsigned int irq_veq)
+{
+	struct virtio_device *dev = dev_to_virtio(_d);
+
+	if (!dev->config->get_vq_affinity)
+		return NULL;
+
+	return dev->config->get_vq_affinity(dev, irq_veq);
+}
+
 static const struct bus_type virtio_bus = {
 	.name  = "virtio",
 	.match = virtio_dev_match,
@@ -384,6 +395,7 @@  static const struct bus_type virtio_bus = {
 	.uevent = virtio_uevent,
 	.probe = virtio_dev_probe,
 	.remove = virtio_dev_remove,
+	.irq_get_affinity = virtio_irq_get_affinity,
 };
 
 int __register_virtio_driver(struct virtio_driver *driver, struct module *owner)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2035fad3131fb60781957095ce8a3a941dd104be..6b40af77bf44afa7112d274b731b591f2a67d68c 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -922,7 +922,12 @@  int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
 void blk_mq_unfreeze_queue_non_owner(struct request_queue *q);
 void blk_freeze_queue_start_non_owner(struct request_queue *q);
 
+typedef const struct cpumask *(get_queue_affinity_fn)(struct device *dev,
+						      unsigned int queue);
 void blk_mq_map_queues(struct blk_mq_queue_map *qmap);
+void blk_mq_hctx_map_queues(struct blk_mq_queue_map *qmap,
+			    struct device *dev, unsigned int offset,
+			    get_queue_affinity_fn *get_queue_affinity);
 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
 
 void blk_mq_quiesce_queue_nowait(struct request_queue *q);
diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h
index cdc4757217f9bb4b36b5c3b8a48bab45737e44c5..b18658bce2c3819fc1cbeb38fb98391d56ec3317 100644
--- a/include/linux/device/bus.h
+++ b/include/linux/device/bus.h
@@ -48,6 +48,7 @@  struct fwnode_handle;
  *		will never get called until they do.
  * @remove:	Called when a device removed from this bus.
  * @shutdown:	Called at shut-down time to quiesce the device.
+ * @irq_get_affinity:	Get IRQ affinity mask for the device on this bus.
  *
  * @online:	Called to put the device back online (after offlining it).
  * @offline:	Called to put the device offline for hot-removal. May fail.
@@ -87,6 +88,8 @@  struct bus_type {
 	void (*sync_state)(struct device *dev);
 	void (*remove)(struct device *dev);
 	void (*shutdown)(struct device *dev);
+	const struct cpumask *(*irq_get_affinity)(struct device *dev,
+			unsigned int irq_vec);
 
 	int (*online)(struct device *dev);
 	int (*offline)(struct device *dev);