@@ -351,6 +351,7 @@ struct napi_config {
u64 gro_flush_timeout;
u64 irq_suspend_timeout;
u32 defer_hard_irqs;
+ cpumask_t affinity_mask;
unsigned int napi_id;
};
@@ -392,8 +393,8 @@ struct napi_struct {
struct list_head dev_list;
struct hlist_node napi_hash_node;
int irq;
-#ifdef CONFIG_RFS_ACCEL
struct irq_affinity_notify notify;
+#ifdef CONFIG_RFS_ACCEL
int napi_rmap_idx;
#endif
int index;
@@ -2402,6 +2403,7 @@ struct net_device {
struct lock_class_key *qdisc_tx_busylock;
bool proto_down;
bool threaded;
+ bool irq_affinity_auto;
#ifdef CONFIG_RFS_ACCEL
bool rx_cpu_rmap_auto;
#endif
@@ -2637,6 +2639,11 @@ static inline void netdev_set_ml_priv(struct net_device *dev,
dev->ml_priv_type = type;
}
+static inline void netif_enable_irq_affinity(struct net_device *dev)
+{
+ dev->irq_affinity_auto = true;
+}
+
/*
* Net namespace inlines
*/
@@ -6736,22 +6736,30 @@ int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs)
return 0;
}
EXPORT_SYMBOL(netif_enable_cpu_rmap);
+#endif
static void
-netif_irq_cpu_rmap_notify(struct irq_affinity_notify *notify,
- const cpumask_t *mask)
+netif_napi_irq_notify(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
{
struct napi_struct *napi =
container_of(notify, struct napi_struct, notify);
+#ifdef CONFIG_RFS_ACCEL
struct cpu_rmap *rmap = napi->dev->rx_cpu_rmap;
int err;
+#endif
+ if (napi->config && napi->dev->irq_affinity_auto)
+ cpumask_copy(&napi->config->affinity_mask, mask);
+
+#ifdef CONFIG_RFS_ACCEL
if (rmap && napi->dev->rx_cpu_rmap_auto) {
err = cpu_rmap_update(rmap, napi->napi_rmap_idx, mask);
if (err)
pr_warn("%s: RMAP update failed (%d)\n",
__func__, err);
}
+#endif
}
static void
@@ -6759,6 +6767,7 @@ netif_napi_affinity_release(struct kref __always_unused *ref)
{
}
+#ifdef CONFIG_RFS_ACCEL
static int napi_irq_cpu_rmap_add(struct napi_struct *napi, int irq)
{
struct cpu_rmap *rmap = napi->dev->rx_cpu_rmap;
@@ -6766,7 +6775,7 @@ static int napi_irq_cpu_rmap_add(struct napi_struct *napi, int irq)
if (!napi || !rmap)
return -EINVAL;
- napi->notify.notify = netif_irq_cpu_rmap_notify;
+ napi->notify.notify = netif_napi_irq_notify;
napi->notify.release = netif_napi_affinity_release;
cpu_rmap_get(rmap);
rc = cpu_rmap_add(rmap, napi);
@@ -6790,9 +6799,8 @@ static int napi_irq_cpu_rmap_add(struct napi_struct *napi, int irq)
void netif_napi_set_irq(struct napi_struct *napi, int irq)
{
-#ifdef CONFIG_RFS_ACCEL
int rc;
-#endif
+
napi->irq = irq;
#ifdef CONFIG_RFS_ACCEL
@@ -6803,8 +6811,18 @@ void netif_napi_set_irq(struct napi_struct *napi, int irq)
rc);
netif_disable_cpu_rmap(napi->dev);
}
- }
+ } else if (irq > 0 && napi->config && napi->dev->irq_affinity_auto) {
+#else
+ if (irq > 0 && napi->config && napi->dev->irq_affinity_auto) {
#endif
+ napi->notify.notify = netif_napi_irq_notify;
+ napi->notify.release = netif_napi_affinity_release;
+
+ rc = irq_set_affinity_notifier(irq, &napi->notify);
+ if (rc)
+ netdev_warn(napi->dev, "Unable to set IRQ notifier (%d)\n",
+ rc);
+ }
}
EXPORT_SYMBOL(netif_napi_set_irq);
@@ -6813,6 +6831,10 @@ static void napi_restore_config(struct napi_struct *n)
n->defer_hard_irqs = n->config->defer_hard_irqs;
n->gro_flush_timeout = n->config->gro_flush_timeout;
n->irq_suspend_timeout = n->config->irq_suspend_timeout;
+
+ if (n->irq > 0 && n->config && n->dev->irq_affinity_auto)
+ irq_set_affinity(n->irq, &n->config->affinity_mask);
+
/* a NAPI ID might be stored in the config, if so use it. if not, use
* napi_hash_add to generate one for us. It will be saved to the config
* in napi_disable.
@@ -6829,6 +6851,10 @@ static void napi_save_config(struct napi_struct *n)
n->config->gro_flush_timeout = n->gro_flush_timeout;
n->config->irq_suspend_timeout = n->irq_suspend_timeout;
n->config->napi_id = n->napi_id;
+
+ if (n->irq > 0 && n->config && n->dev->irq_affinity_auto)
+ irq_set_affinity_notifier(n->irq, NULL);
+
napi_hash_del(n);
}
@@ -11293,7 +11319,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
{
struct net_device *dev;
size_t napi_config_sz;
- unsigned int maxqs;
+ unsigned int maxqs, i, numa;
BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -11389,6 +11415,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->napi_config = kvzalloc(napi_config_sz, GFP_KERNEL_ACCOUNT);
if (!dev->napi_config)
goto free_all;
+ numa = dev_to_node(&dev->dev);
+ for (i = 0; i < maxqs; i++)
+ cpumask_set_cpu(cpumask_local_spread(i, numa),
+ &dev->napi_config[i].affinity_mask);
strscpy(dev->name, name);
dev->name_assign_type = name_assign_type;
A common task for most drivers is to remember the user-set CPU affinity to its IRQs. On each netdev reset, the driver should re-assign the user's settings to the IRQs. Add CPU affinity mask to napi_config. To delegate the CPU affinity management to the core, drivers must: 1 - set the new netdev flag "irq_affinity_auto": netif_enable_irq_affinity(netdev) 2 - create the napi with persistent config: netif_napi_add_config() 3 - bind an IRQ to the napi instance: netif_napi_set_irq() the core will then make sure to use re-assign affinity to the napi's IRQ. The default IRQ mask is set to one cpu starting from the closest NUMA. Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com> --- include/linux/netdevice.h | 9 +++++++- net/core/dev.c | 44 ++++++++++++++++++++++++++++++++------- 2 files changed, 45 insertions(+), 8 deletions(-)