@@ -3238,6 +3238,15 @@
different yeeloong laptops.
Example: machtype=lemote-yeeloong-2f-7inch
+ managed_irqs_per_node=
+ [KNL,SMP] Support for limiting the number of managed
+ interrupts on every node to prevent the case that
+ interrupts cannot be properly allocated where a large
+ number of devices are present. The default number is 0,
+ that means no limit to the number of managed irqs.
+ Format: integer between 0 and num_possible_cpus() / num_possible_nodes()
+ Default: 0
+
maxcpus= [SMP,EARLY] Maximum number of processors that an SMP kernel
will bring up during bootup. maxcpus=n : n >= 0 limits
the kernel to bring up 'n' processors. Surely after
@@ -20,7 +20,7 @@ void blk_mq_map_queues(struct blk_mq_queue_map *qmap)
const struct cpumask *masks;
unsigned int queue, cpu;
- masks = group_cpus_evenly(qmap->nr_queues);
+ masks = group_cpus_evenly(qmap->nr_queues, true);
if (!masks) {
for_each_possible_cpu(cpu)
qmap->mq_map[cpu] = qmap->queue_offset;
@@ -330,7 +330,7 @@ create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
unsigned int this_vecs = affd->set_size[i];
int j;
- struct cpumask *result = group_cpus_evenly(this_vecs);
+ struct cpumask *result = group_cpus_evenly(this_vecs, true);
if (!result) {
kfree(masks);
@@ -881,7 +881,7 @@ static void virtio_fs_map_queues(struct virtio_device *vdev, struct virtio_fs *f
return;
fallback:
/* Attempt to map evenly in groups over the CPUs */
- masks = group_cpus_evenly(fs->num_request_queues);
+ masks = group_cpus_evenly(fs->num_request_queues, true);
/* If even this fails we default to all CPUs use queue zero */
if (!masks) {
for_each_possible_cpu(cpu)
@@ -9,6 +9,6 @@
#include <linux/kernel.h>
#include <linux/cpu.h>
-struct cpumask *group_cpus_evenly(unsigned int numgrps);
+struct cpumask *group_cpus_evenly(unsigned int numgrps, bool is_managed);
#endif
@@ -64,6 +64,10 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
for (curvec = 0; curvec < affd->pre_vectors; curvec++)
cpumask_copy(&masks[curvec].mask, irq_default_affinity);
+ /* Mark the managed interrupts */
+ for (i = curvec; i < nvecs - affd->post_vectors; i++)
+ masks[i].is_managed = 1;
+
/*
* Spread on present CPUs starting from affd->pre_vectors. If we
* have multiple sets, build each sets affinity mask separately.
@@ -71,7 +75,8 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
unsigned int this_vecs = affd->set_size[i];
int j;
- struct cpumask *result = group_cpus_evenly(this_vecs);
+ struct cpumask *result = group_cpus_evenly(this_vecs,
+ masks[curvec].is_managed);
if (!result) {
kfree(masks);
@@ -94,10 +99,6 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
for (; curvec < nvecs; curvec++)
cpumask_copy(&masks[curvec].mask, irq_default_affinity);
- /* Mark the managed interrupts */
- for (i = affd->pre_vectors; i < nvecs - affd->post_vectors; i++)
- masks[i].is_managed = 1;
-
return masks;
}
@@ -11,6 +11,30 @@
#ifdef CONFIG_SMP
+static unsigned int __read_mostly managed_irqs_per_node;
+static struct cpumask managed_irqs_cpumsk[MAX_NUMNODES] __cacheline_aligned_in_smp = {
+ [0 ... MAX_NUMNODES-1] = {CPU_BITS_ALL}
+};
+
+static int __init irq_managed_setup(char *str)
+{
+ int ret;
+
+ ret = kstrtouint(str, 10, &managed_irqs_per_node);
+ if (ret < 0) {
+ pr_warn("managed_irqs_per_node= cannot parse, ignored\n");
+ return 0;
+ }
+
+ if (managed_irqs_per_node * num_possible_nodes() > num_possible_cpus()) {
+ managed_irqs_per_node = num_possible_cpus() / num_possible_nodes();
+ pr_warn("managed_irqs_per_node= cannot be larger than %u\n",
+ managed_irqs_per_node);
+ }
+ return 1;
+}
+__setup("managed_irqs_per_node=", irq_managed_setup);
+
static void grp_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
unsigned int cpus_per_grp)
{
@@ -246,6 +270,30 @@ static void alloc_nodes_groups(unsigned int numgrps,
}
}
+static void __group_prepare_affinity(struct cpumask *premask,
+ cpumask_var_t *node_to_cpumask)
+{
+ nodemask_t nodemsk = NODE_MASK_NONE;
+ unsigned int ncpus, n;
+
+ get_nodes_in_cpumask(node_to_cpumask, premask, &nodemsk);
+
+ for_each_node_mask(n, nodemsk) {
+ cpumask_and(&managed_irqs_cpumsk[n], &managed_irqs_cpumsk[n], premask);
+ cpumask_and(&managed_irqs_cpumsk[n], &managed_irqs_cpumsk[n], node_to_cpumask[n]);
+
+ ncpus = cpumask_weight(&managed_irqs_cpumsk[n]);
+ if (ncpus < managed_irqs_per_node) {
+ /* Reset node n to current node cpumask */
+ cpumask_copy(&managed_irqs_cpumsk[n], node_to_cpumask[n]);
+ continue;
+ }
+
+ grp_spread_init_one(premask, &managed_irqs_cpumsk[n], managed_irqs_per_node);
+ }
+}
+
+
static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps,
cpumask_var_t *node_to_cpumask,
const struct cpumask *cpu_mask,
@@ -332,6 +380,7 @@ static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps,
/**
* group_cpus_evenly - Group all CPUs evenly per NUMA/CPU locality
* @numgrps: number of groups
+ * @is_managed: if these groups managed by kernel
*
* Return: cpumask array if successful, NULL otherwise. And each element
* includes CPUs assigned to this group
@@ -344,7 +393,7 @@ static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps,
* We guarantee in the resulted grouping that all CPUs are covered, and
* no same CPU is assigned to multiple groups
*/
-struct cpumask *group_cpus_evenly(unsigned int numgrps)
+struct cpumask *group_cpus_evenly(unsigned int numgrps, bool is_managed)
{
unsigned int curgrp = 0, nr_present = 0, nr_others = 0;
cpumask_var_t *node_to_cpumask;
@@ -382,6 +431,10 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps)
*/
cpumask_copy(npresmsk, data_race(cpu_present_mask));
+ /* Limit the count of managed interrupts on every node */
+ if (is_managed && managed_irqs_per_node)
+ __group_prepare_affinity(npresmsk, node_to_cpumask);
+
/* grouping present CPUs first */
ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
npresmsk, nmsk, masks);