diff mbox series

[2/3] nvme-pci: limit queue count to housekeeping cpus

Message ID 20240621-isolcpus-io-queues-v1-2-8b169bf41083@suse.de (mailing list archive)
State New
Headers show
Series nvme-pci: honor isolcpus configuration | expand

Commit Message

Daniel Wagner June 21, 2024, 1:53 p.m. UTC
The nvme-pci driver is ignoring the isolcpus and allocates IO queues for
all possible CPUs. But this could add noise to the isolated CPU whenever
there is IO issued on the isolated CPU. This is not always what the user
wants. Thus only ask for as many queues as there are housekeeping CPUs.

Note, the placement of the queues will be addressed in the next patch.

Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
 block/blk-mq-cpumap.c   | 13 +++++++++++++
 drivers/nvme/host/pci.c |  4 ++--
 include/linux/blk-mq.h  |  1 +
 3 files changed, 16 insertions(+), 2 deletions(-)

Comments

Christoph Hellwig June 22, 2024, 5:14 a.m. UTC | #1
> diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
> index 9638b25fd521..43c039900ef6 100644
> --- a/block/blk-mq-cpumap.c
> +++ b/block/blk-mq-cpumap.c
> @@ -11,10 +11,23 @@
>  #include <linux/smp.h>
>  #include <linux/cpu.h>
>  #include <linux/group_cpus.h>
> +#include <linux/sched/isolation.h>
>  
>  #include "blk.h"
>  #include "blk-mq.h"
>  
> +unsigned int blk_mq_num_possible_queues(void)
> +{
> +	const struct cpumask *io_queue_mask;
> +
> +	io_queue_mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE);
> +	if (!cpumask_empty(io_queue_mask))
> +		return cpumask_weight(io_queue_mask);
> +
> +	return num_possible_cpus();
> +}
> +EXPORT_SYMBOL_GPL(blk_mq_num_possible_queues);

This should be split into a separate patch.  And it could really use
a kerneldoc comment.

> -	return num_possible_cpus() + dev->nr_write_queues + dev->nr_poll_queues;
> +	return blk_mq_num_possible_queues() + dev->nr_write_queues + dev->nr_poll_queues;

Please avoid the overly long line here.

Otherwise this looks good to me.
Sagi Grimberg June 23, 2024, 7:03 a.m. UTC | #2
On 22/06/2024 8:14, Christoph Hellwig wrote:
>> diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
>> index 9638b25fd521..43c039900ef6 100644
>> --- a/block/blk-mq-cpumap.c
>> +++ b/block/blk-mq-cpumap.c
>> @@ -11,10 +11,23 @@
>>   #include <linux/smp.h>
>>   #include <linux/cpu.h>
>>   #include <linux/group_cpus.h>
>> +#include <linux/sched/isolation.h>
>>   
>>   #include "blk.h"
>>   #include "blk-mq.h"
>>   
>> +unsigned int blk_mq_num_possible_queues(void)
>> +{
>> +	const struct cpumask *io_queue_mask;
>> +
>> +	io_queue_mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE);
>> +	if (!cpumask_empty(io_queue_mask))
>> +		return cpumask_weight(io_queue_mask);
>> +
>> +	return num_possible_cpus();
>> +}
>> +EXPORT_SYMBOL_GPL(blk_mq_num_possible_queues);
> This should be split into a separate patch.  And it could really use
> a kerneldoc comment.

Agree.

Other than that, looks good.
diff mbox series

Patch

diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 9638b25fd521..43c039900ef6 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -11,10 +11,23 @@ 
 #include <linux/smp.h>
 #include <linux/cpu.h>
 #include <linux/group_cpus.h>
+#include <linux/sched/isolation.h>
 
 #include "blk.h"
 #include "blk-mq.h"
 
+unsigned int blk_mq_num_possible_queues(void)
+{
+	const struct cpumask *io_queue_mask;
+
+	io_queue_mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE);
+	if (!cpumask_empty(io_queue_mask))
+		return cpumask_weight(io_queue_mask);
+
+	return num_possible_cpus();
+}
+EXPORT_SYMBOL_GPL(blk_mq_num_possible_queues);
+
 void blk_mq_map_queues(struct blk_mq_queue_map *qmap)
 {
 	const struct cpumask *masks;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 102a9fb0c65f..66999fa13b2c 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -81,7 +81,7 @@  static int io_queue_count_set(const char *val, const struct kernel_param *kp)
 	int ret;
 
 	ret = kstrtouint(val, 10, &n);
-	if (ret != 0 || n > num_possible_cpus())
+	if (ret != 0 || n > blk_mq_num_possible_queues())
 		return -EINVAL;
 	return param_set_uint(val, kp);
 }
@@ -2263,7 +2263,7 @@  static unsigned int nvme_max_io_queues(struct nvme_dev *dev)
 	 */
 	if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS)
 		return 1;
-	return num_possible_cpus() + dev->nr_write_queues + dev->nr_poll_queues;
+	return blk_mq_num_possible_queues() + dev->nr_write_queues + dev->nr_poll_queues;
 }
 
 static int nvme_setup_io_queues(struct nvme_dev *dev)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 89ba6b16fe8b..2105cc78ca67 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -900,6 +900,7 @@  void blk_mq_freeze_queue_wait(struct request_queue *q);
 int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
 				     unsigned long timeout);
 
+unsigned int blk_mq_num_possible_queues(void);
 void blk_mq_map_queues(struct blk_mq_queue_map *qmap);
 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);