@@ -278,6 +278,8 @@ extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m);
extern int
irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
+struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs);
+
#else /* CONFIG_SMP */
static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
@@ -308,6 +310,12 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
{
return 0;
}
+
+static inline struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs)
+{
+ *nr_vecs = 1;
+ return NULL;
+}
#endif /* CONFIG_SMP */
/*
@@ -9,3 +9,4 @@ obj-$(CONFIG_GENERIC_IRQ_MIGRATION) += cpuhotplug.o
obj-$(CONFIG_PM_SLEEP) += pm.o
obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o
obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o
+obj-$(CONFIG_SMP) += affinity.o
new file mode 100644
@@ -0,0 +1,61 @@
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+
+static int get_first_sibling(unsigned int cpu)
+{
+ unsigned int ret;
+
+ ret = cpumask_first(topology_sibling_cpumask(cpu));
+ if (ret < nr_cpu_ids)
+ return ret;
+ return cpu;
+}
+
+/*
+ * Take a map of online CPUs and the number of available interrupt vectors
+ * and generate an output cpumask suitable for spreading MSI/MSI-X vectors
+ * so that they are distributed as good as possible around the CPUs. If
+ * more vectors than CPUs are available we'll map one to each CPU,
+ * otherwise we map one to the first sibling of each socket.
+ *
+ * If there are more vectors than CPUs we will still only have one bit
+ * set per CPU, but interrupt code will keep on assigning the vectors from
+ * the start of the bitmap until we run out of vectors.
+ */
+struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs)
+{
+ struct cpumask *affinity_mask;
+ unsigned int max_vecs = *nr_vecs;
+
+ if (max_vecs == 1)
+ return NULL;
+
+ affinity_mask = kzalloc(cpumask_size(), GFP_KERNEL);
+ if (!affinity_mask) {
+ *nr_vecs = 1;
+ return NULL;
+ }
+
+ if (max_vecs >= num_online_cpus()) {
+ cpumask_copy(affinity_mask, cpu_online_mask);
+ *nr_vecs = num_online_cpus();
+ } else {
+ unsigned int vecs = 0, cpu;
+
+ for_each_online_cpu(cpu) {
+ if (cpu == get_first_sibling(cpu)) {
+ cpumask_set_cpu(cpu, affinity_mask);
+ vecs++;
+ }
+
+ if (--max_vecs == 0)
+ break;
+ }
+ *nr_vecs = vecs;
+ }
+
+ return affinity_mask;
+}
This is lifted from the blk-mq code and adopted to use the affinity mask concept just introduced in the irq handling code. It tries to keep the algorithm the same as the one current used by blk-mq, but improvements like assining vectors on a per-node basis instead of just per sibling are possible on of this simple move and refactoring. Signed-off-by: Christoph Hellwig <hch@lst.de> --- include/linux/interrupt.h | 8 +++++++ kernel/irq/Makefile | 1 + kernel/irq/affinity.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+) create mode 100644 kernel/irq/affinity.c