Message ID | 20250113171042.158123-2-ahmed.zaki@intel.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: napi: add CPU affinity to napi->config | expand |
> Add a new netdev flag "rx_cpu_rmap_auto". Drivers supporting ARFS should > set the flag via netif_enable_cpu_rmap() and core will allocate and manage > the ARFS rmap. Freeing the rmap is also done by core when the netdev is > freed. > > Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com> > --- > drivers/net/ethernet/amazon/ena/ena_netdev.c | 38 ++--------------- > drivers/net/ethernet/broadcom/bnxt/bnxt.c | 27 ++---------- > drivers/net/ethernet/intel/ice/ice_arfs.c | 17 +------- > include/linux/netdevice.h | 15 +++++-- > net/core/dev.c | 44 ++++++++++++++++++++ > 5 files changed, 63 insertions(+), 78 deletions(-) > > diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c > b/drivers/net/ethernet/amazon/ena/ena_netdev.c > index c1295dfad0d0..a3fceaa83cd5 100644 > --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c > +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c > @@ -5,9 +5,6 @@ > > #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > > -#ifdef CONFIG_RFS_ACCEL > -#include <linux/cpu_rmap.h> > -#endif /* CONFIG_RFS_ACCEL */ > #include <linux/ethtool.h> > #include <linux/kernel.h> > #include <linux/module.h> > @@ -165,25 +162,10 @@ int ena_xmit_common(struct ena_adapter > *adapter, static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) { > #ifdef CONFIG_RFS_ACCEL > - u32 i; > - int rc; > - > - adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter- > >num_io_queues); > - if (!adapter->netdev->rx_cpu_rmap) > - return -ENOMEM; > - for (i = 0; i < adapter->num_io_queues; i++) { > - int irq_idx = ENA_IO_IRQ_IDX(i); > - > - rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap, > - pci_irq_vector(adapter->pdev, irq_idx)); > - if (rc) { > - free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); > - adapter->netdev->rx_cpu_rmap = NULL; > - return rc; > - } > - } > -#endif /* CONFIG_RFS_ACCEL */ > + return netif_enable_cpu_rmap(adapter->netdev, > +adapter->num_io_queues); #else > return 0; > +#endif /* CONFIG_RFS_ACCEL */ > } > > static void ena_init_io_rings_common(struct ena_adapter *adapter, @@ - > 1742,13 +1724,6 @@ static void ena_free_io_irq(struct ena_adapter > *adapter) > struct ena_irq *irq; > int i; > > -#ifdef CONFIG_RFS_ACCEL > - if (adapter->msix_vecs >= 1) { > - free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); > - adapter->netdev->rx_cpu_rmap = NULL; > - } > -#endif /* CONFIG_RFS_ACCEL */ > - > for (i = ENA_IO_IRQ_FIRST_IDX; i < > ENA_MAX_MSIX_VEC(io_queue_count); i++) { > irq = &adapter->irq_tbl[i]; > irq_set_affinity_hint(irq->vector, NULL); @@ -4131,13 +4106,6 @@ > static void __ena_shutoff(struct pci_dev *pdev, bool shutdown) > ena_dev = adapter->ena_dev; > netdev = adapter->netdev; > > -#ifdef CONFIG_RFS_ACCEL > - if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) { > - free_irq_cpu_rmap(netdev->rx_cpu_rmap); > - netdev->rx_cpu_rmap = NULL; > - } > - > -#endif /* CONFIG_RFS_ACCEL */ > /* Make sure timer and reset routine won't be called after > * freeing device resources. > */ > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c > b/drivers/net/ethernet/broadcom/bnxt/bnxt.c > index 884d42db5554..1f50bc715038 100644 > --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c > +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c > @@ -49,7 +49,6 @@ > #include <linux/cache.h> > #include <linux/log2.h> > #include <linux/bitmap.h> > -#include <linux/cpu_rmap.h> > #include <linux/cpumask.h> > #include <net/pkt_cls.h> > #include <net/page_pool/helpers.h> > @@ -10861,7 +10860,7 @@ static int bnxt_set_real_num_queues(struct bnxt > *bp) > > #ifdef CONFIG_RFS_ACCEL > if (bp->flags & BNXT_FLAG_RFS) > - dev->rx_cpu_rmap = alloc_irq_cpu_rmap(bp->rx_nr_rings); > + return netif_enable_cpu_rmap(dev, bp->rx_nr_rings); > #endif > > return rc; > @@ -11215,10 +11214,6 @@ static void bnxt_free_irq(struct bnxt *bp) > struct bnxt_irq *irq; > int i; > > -#ifdef CONFIG_RFS_ACCEL > - free_irq_cpu_rmap(bp->dev->rx_cpu_rmap); > - bp->dev->rx_cpu_rmap = NULL; > -#endif > if (!bp->irq_tbl || !bp->bnapi) > return; > > @@ -11241,11 +11236,8 @@ static void bnxt_free_irq(struct bnxt *bp) > > static int bnxt_request_irq(struct bnxt *bp) { > - int i, j, rc = 0; > + int i, rc = 0; > unsigned long flags = 0; > -#ifdef CONFIG_RFS_ACCEL > - struct cpu_rmap *rmap; > -#endif > > rc = bnxt_setup_int_mode(bp); > if (rc) { > @@ -11253,22 +11245,11 @@ static int bnxt_request_irq(struct bnxt *bp) > rc); > return rc; > } > -#ifdef CONFIG_RFS_ACCEL > - rmap = bp->dev->rx_cpu_rmap; > -#endif > - for (i = 0, j = 0; i < bp->cp_nr_rings; i++) { > + > + for (i = 0; i < bp->cp_nr_rings; i++) { > int map_idx = bnxt_cp_num_to_irq_num(bp, i); > struct bnxt_irq *irq = &bp->irq_tbl[map_idx]; > > -#ifdef CONFIG_RFS_ACCEL > - if (rmap && bp->bnapi[i]->rx_ring) { > - rc = irq_cpu_rmap_add(rmap, irq->vector); > - if (rc) > - netdev_warn(bp->dev, "failed adding irq rmap for ring > %d\n", > - j); > - j++; > - } > -#endif > rc = request_irq(irq->vector, irq->handler, flags, irq->name, > bp->bnapi[i]); > if (rc) > diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c > b/drivers/net/ethernet/intel/ice/ice_arfs.c > index 7cee365cc7d1..3b1b892e6958 100644 > --- a/drivers/net/ethernet/intel/ice/ice_arfs.c > +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c > @@ -584,9 +584,6 @@ void ice_free_cpu_rx_rmap(struct ice_vsi *vsi) > netdev = vsi->netdev; > if (!netdev || !netdev->rx_cpu_rmap) > return; > - > - free_irq_cpu_rmap(netdev->rx_cpu_rmap); > - netdev->rx_cpu_rmap = NULL; > } > > /** > @@ -597,7 +594,6 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) { > struct net_device *netdev; > struct ice_pf *pf; > - int i; > > if (!vsi || vsi->type != ICE_VSI_PF) > return 0; > @@ -610,18 +606,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) > netdev_dbg(netdev, "Setup CPU RMAP: vsi type 0x%x, ifname %s, > q_vectors %d\n", > vsi->type, netdev->name, vsi->num_q_vectors); > > - netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(vsi->num_q_vectors); > - if (unlikely(!netdev->rx_cpu_rmap)) > - return -EINVAL; > - > - ice_for_each_q_vector(vsi, i) > - if (irq_cpu_rmap_add(netdev->rx_cpu_rmap, > - vsi->q_vectors[i]->irq.virq)) { > - ice_free_cpu_rx_rmap(vsi); > - return -EINVAL; > - } > - > - return 0; > + return netif_enable_cpu_rmap(netdev, vsi->num_q_vectors); > } > > /** > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index > aeb4a6cff171..7e95e9ee36dd 100644 > --- a/include/linux/netdevice.h > +++ b/include/linux/netdevice.h > @@ -1991,6 +1991,9 @@ enum netdev_reg_state { > * > * @threaded: napi threaded mode is enabled > * > + * @rx_cpu_rmap_auto: driver wants the core to manage the ARFS rmap. > + * Set by calling netif_enable_cpu_rmap(). > + * > * @see_all_hwtstamp_requests: device wants to see calls to > * ndo_hwtstamp_set() for all timestamp requests > * regardless of source, even if those aren't > @@ -2398,6 +2401,9 @@ struct net_device { > struct lock_class_key *qdisc_tx_busylock; > bool proto_down; > bool threaded; > +#ifdef CONFIG_RFS_ACCEL > + bool rx_cpu_rmap_auto; > +#endif > > /* priv_flags_slow, ungrouped to save space */ > unsigned long see_all_hwtstamp_requests:1; > @@ -2671,10 +2677,7 @@ void netif_queue_set_napi(struct net_device > *dev, unsigned int queue_index, > enum netdev_queue_type type, > struct napi_struct *napi); > > -static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) -{ > - napi->irq = irq; > -} > +void netif_napi_set_irq(struct napi_struct *napi, int irq); > > /* Default NAPI poll() weight > * Device drivers are strongly advised to not use bigger value @@ -2765,6 > +2768,10 @@ static inline void netif_napi_del(struct napi_struct *napi) > synchronize_net(); > } > > +#ifdef CONFIG_RFS_ACCEL > +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int > +num_irqs); > + > +#endif > struct packet_type { > __be16 type; /* This is really htons(ether_type). */ > bool ignore_outgoing; > diff --git a/net/core/dev.c b/net/core/dev.c index > 1a90ed8cc6cc..3ee7a514dca8 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -6745,6 +6745,46 @@ void netif_queue_set_napi(struct net_device > *dev, unsigned int queue_index, } > EXPORT_SYMBOL(netif_queue_set_napi); > > +#ifdef CONFIG_RFS_ACCEL > +static void netif_disable_cpu_rmap(struct net_device *dev) { > + free_irq_cpu_rmap(dev->rx_cpu_rmap); > + dev->rx_cpu_rmap = NULL; > + dev->rx_cpu_rmap_auto = false; > +} > + > +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int > +num_irqs) { > + dev->rx_cpu_rmap = alloc_irq_cpu_rmap(num_irqs); > + if (!dev->rx_cpu_rmap) > + return -ENOMEM; > + > + dev->rx_cpu_rmap_auto = true; > + return 0; > +} > +EXPORT_SYMBOL(netif_enable_cpu_rmap); > +#endif > + > +void netif_napi_set_irq(struct napi_struct *napi, int irq) { #ifdef > +CONFIG_RFS_ACCEL > + int rc; > +#endif > + napi->irq = irq; > + > +#ifdef CONFIG_RFS_ACCEL > + if (napi->dev->rx_cpu_rmap && napi->dev->rx_cpu_rmap_auto) { > + rc = irq_cpu_rmap_add(napi->dev->rx_cpu_rmap, irq); > + if (rc) { > + netdev_warn(napi->dev, "Unable to update ARFS map (%d)\n", > + rc); > + netif_disable_cpu_rmap(napi->dev); > + } > + } > +#endif > +} > +EXPORT_SYMBOL(netif_napi_set_irq); > + > static void napi_restore_config(struct napi_struct *n) { > n->defer_hard_irqs = n->config->defer_hard_irqs; @@ -11421,6 > +11461,10 @@ void free_netdev(struct net_device *dev) > /* Flush device addresses */ > dev_addr_flush(dev); > > +#ifdef CONFIG_RFS_ACCEL > + if (dev->rx_cpu_rmap && dev->rx_cpu_rmap_auto) > + netif_disable_cpu_rmap(dev); #endif > list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) > netif_napi_del(p); > > -- > 2.43.0 Thanks for making the change in the ENA driver. Acked-by: David Arinzon <darinzon@amazon.com>
On Mon, 13 Jan 2025 10:10:37 -0700 Ahmed Zaki wrote: > -#endif /* CONFIG_RFS_ACCEL */ > + return netif_enable_cpu_rmap(adapter->netdev, adapter->num_io_queues); > +#else > return 0; > +#endif /* CONFIG_RFS_ACCEL */ Let's try to eliminate some of the ifdef-ery on the driver side. netif_enable_cpu_rmap() should simply do nothing if !CONFIG_RFS_ACCEL > @@ -2398,6 +2401,9 @@ struct net_device { > struct lock_class_key *qdisc_tx_busylock; > bool proto_down; > bool threaded; > +#ifdef CONFIG_RFS_ACCEL > + bool rx_cpu_rmap_auto; > +#endif similar point, don't hide it, it's just one byte and we can just leave it as false if !CONFIG_RFS_ACCEL. It can save us a bunch of other ifdefs > +#ifdef CONFIG_RFS_ACCEL > +static void netif_disable_cpu_rmap(struct net_device *dev) > +{ > + free_irq_cpu_rmap(dev->rx_cpu_rmap); > + dev->rx_cpu_rmap = NULL; > + dev->rx_cpu_rmap_auto = false; > +} Better do do: static void netif_disable_cpu_rmap(struct net_device *dev) { #ifdef CONFIG_RFS_ACCEL free_irq_cpu_rmap(dev->rx_cpu_rmap); dev->rx_cpu_rmap = NULL; dev->rx_cpu_rmap_auto = false; #endif } IOW if not relevant the function should do nothing > +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs) > +{ > + dev->rx_cpu_rmap = alloc_irq_cpu_rmap(num_irqs); > + if (!dev->rx_cpu_rmap) > + return -ENOMEM; > + > + dev->rx_cpu_rmap_auto = true; > + return 0; > +} > +EXPORT_SYMBOL(netif_enable_cpu_rmap); here you can depend on dead code elimination: int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs) { if (!IS_ENABLED(CONFIG_RFS_ACCEL)) return 0; ... } > +#endif > + > +void netif_napi_set_irq(struct napi_struct *napi, int irq) > +{ > +#ifdef CONFIG_RFS_ACCEL > + int rc; > +#endif > + napi->irq = irq; > + > +#ifdef CONFIG_RFS_ACCEL > + if (napi->dev->rx_cpu_rmap && napi->dev->rx_cpu_rmap_auto) { > + rc = irq_cpu_rmap_add(napi->dev->rx_cpu_rmap, irq); > + if (rc) { > + netdev_warn(napi->dev, "Unable to update ARFS map (%d)\n", > + rc); > + netif_disable_cpu_rmap(napi->dev); > + } > + } > +#endif Declare rc inside the if to avoid the extra ifdef on variable decl > +} > +EXPORT_SYMBOL(netif_napi_set_irq); > + > static void napi_restore_config(struct napi_struct *n) > { > n->defer_hard_irqs = n->config->defer_hard_irqs; > @@ -11421,6 +11461,10 @@ void free_netdev(struct net_device *dev) > /* Flush device addresses */ > dev_addr_flush(dev); > > +#ifdef CONFIG_RFS_ACCEL > + if (dev->rx_cpu_rmap && dev->rx_cpu_rmap_auto) don't check dev->rx_cpu_rmap, dev->rx_cpu_rmap_auto is enough > + netif_disable_cpu_rmap(dev); > +#endif > list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) > netif_napi_del(p); > IRQs are often allocated in ndo_open and freed in ndo_stop, so you need to catch netif_napi_del or napi_disable and remove the IRQ from the map. Similarly netif_napi_set_irq() may get called with -1 to clear the IRQ number, which you currently treat at a real IRQ id, AFAICT.
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index c1295dfad0d0..a3fceaa83cd5 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -5,9 +5,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#ifdef CONFIG_RFS_ACCEL -#include <linux/cpu_rmap.h> -#endif /* CONFIG_RFS_ACCEL */ #include <linux/ethtool.h> #include <linux/kernel.h> #include <linux/module.h> @@ -165,25 +162,10 @@ int ena_xmit_common(struct ena_adapter *adapter, static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) { #ifdef CONFIG_RFS_ACCEL - u32 i; - int rc; - - adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues); - if (!adapter->netdev->rx_cpu_rmap) - return -ENOMEM; - for (i = 0; i < adapter->num_io_queues; i++) { - int irq_idx = ENA_IO_IRQ_IDX(i); - - rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap, - pci_irq_vector(adapter->pdev, irq_idx)); - if (rc) { - free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); - adapter->netdev->rx_cpu_rmap = NULL; - return rc; - } - } -#endif /* CONFIG_RFS_ACCEL */ + return netif_enable_cpu_rmap(adapter->netdev, adapter->num_io_queues); +#else return 0; +#endif /* CONFIG_RFS_ACCEL */ } static void ena_init_io_rings_common(struct ena_adapter *adapter, @@ -1742,13 +1724,6 @@ static void ena_free_io_irq(struct ena_adapter *adapter) struct ena_irq *irq; int i; -#ifdef CONFIG_RFS_ACCEL - if (adapter->msix_vecs >= 1) { - free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); - adapter->netdev->rx_cpu_rmap = NULL; - } -#endif /* CONFIG_RFS_ACCEL */ - for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) { irq = &adapter->irq_tbl[i]; irq_set_affinity_hint(irq->vector, NULL); @@ -4131,13 +4106,6 @@ static void __ena_shutoff(struct pci_dev *pdev, bool shutdown) ena_dev = adapter->ena_dev; netdev = adapter->netdev; -#ifdef CONFIG_RFS_ACCEL - if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) { - free_irq_cpu_rmap(netdev->rx_cpu_rmap); - netdev->rx_cpu_rmap = NULL; - } - -#endif /* CONFIG_RFS_ACCEL */ /* Make sure timer and reset routine won't be called after * freeing device resources. */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 884d42db5554..1f50bc715038 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -49,7 +49,6 @@ #include <linux/cache.h> #include <linux/log2.h> #include <linux/bitmap.h> -#include <linux/cpu_rmap.h> #include <linux/cpumask.h> #include <net/pkt_cls.h> #include <net/page_pool/helpers.h> @@ -10861,7 +10860,7 @@ static int bnxt_set_real_num_queues(struct bnxt *bp) #ifdef CONFIG_RFS_ACCEL if (bp->flags & BNXT_FLAG_RFS) - dev->rx_cpu_rmap = alloc_irq_cpu_rmap(bp->rx_nr_rings); + return netif_enable_cpu_rmap(dev, bp->rx_nr_rings); #endif return rc; @@ -11215,10 +11214,6 @@ static void bnxt_free_irq(struct bnxt *bp) struct bnxt_irq *irq; int i; -#ifdef CONFIG_RFS_ACCEL - free_irq_cpu_rmap(bp->dev->rx_cpu_rmap); - bp->dev->rx_cpu_rmap = NULL; -#endif if (!bp->irq_tbl || !bp->bnapi) return; @@ -11241,11 +11236,8 @@ static void bnxt_free_irq(struct bnxt *bp) static int bnxt_request_irq(struct bnxt *bp) { - int i, j, rc = 0; + int i, rc = 0; unsigned long flags = 0; -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; -#endif rc = bnxt_setup_int_mode(bp); if (rc) { @@ -11253,22 +11245,11 @@ static int bnxt_request_irq(struct bnxt *bp) rc); return rc; } -#ifdef CONFIG_RFS_ACCEL - rmap = bp->dev->rx_cpu_rmap; -#endif - for (i = 0, j = 0; i < bp->cp_nr_rings; i++) { + + for (i = 0; i < bp->cp_nr_rings; i++) { int map_idx = bnxt_cp_num_to_irq_num(bp, i); struct bnxt_irq *irq = &bp->irq_tbl[map_idx]; -#ifdef CONFIG_RFS_ACCEL - if (rmap && bp->bnapi[i]->rx_ring) { - rc = irq_cpu_rmap_add(rmap, irq->vector); - if (rc) - netdev_warn(bp->dev, "failed adding irq rmap for ring %d\n", - j); - j++; - } -#endif rc = request_irq(irq->vector, irq->handler, flags, irq->name, bp->bnapi[i]); if (rc) diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index 7cee365cc7d1..3b1b892e6958 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -584,9 +584,6 @@ void ice_free_cpu_rx_rmap(struct ice_vsi *vsi) netdev = vsi->netdev; if (!netdev || !netdev->rx_cpu_rmap) return; - - free_irq_cpu_rmap(netdev->rx_cpu_rmap); - netdev->rx_cpu_rmap = NULL; } /** @@ -597,7 +594,6 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) { struct net_device *netdev; struct ice_pf *pf; - int i; if (!vsi || vsi->type != ICE_VSI_PF) return 0; @@ -610,18 +606,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) netdev_dbg(netdev, "Setup CPU RMAP: vsi type 0x%x, ifname %s, q_vectors %d\n", vsi->type, netdev->name, vsi->num_q_vectors); - netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(vsi->num_q_vectors); - if (unlikely(!netdev->rx_cpu_rmap)) - return -EINVAL; - - ice_for_each_q_vector(vsi, i) - if (irq_cpu_rmap_add(netdev->rx_cpu_rmap, - vsi->q_vectors[i]->irq.virq)) { - ice_free_cpu_rx_rmap(vsi); - return -EINVAL; - } - - return 0; + return netif_enable_cpu_rmap(netdev, vsi->num_q_vectors); } /** diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index aeb4a6cff171..7e95e9ee36dd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1991,6 +1991,9 @@ enum netdev_reg_state { * * @threaded: napi threaded mode is enabled * + * @rx_cpu_rmap_auto: driver wants the core to manage the ARFS rmap. + * Set by calling netif_enable_cpu_rmap(). + * * @see_all_hwtstamp_requests: device wants to see calls to * ndo_hwtstamp_set() for all timestamp requests * regardless of source, even if those aren't @@ -2398,6 +2401,9 @@ struct net_device { struct lock_class_key *qdisc_tx_busylock; bool proto_down; bool threaded; +#ifdef CONFIG_RFS_ACCEL + bool rx_cpu_rmap_auto; +#endif /* priv_flags_slow, ungrouped to save space */ unsigned long see_all_hwtstamp_requests:1; @@ -2671,10 +2677,7 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, enum netdev_queue_type type, struct napi_struct *napi); -static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) -{ - napi->irq = irq; -} +void netif_napi_set_irq(struct napi_struct *napi, int irq); /* Default NAPI poll() weight * Device drivers are strongly advised to not use bigger value @@ -2765,6 +2768,10 @@ static inline void netif_napi_del(struct napi_struct *napi) synchronize_net(); } +#ifdef CONFIG_RFS_ACCEL +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs); + +#endif struct packet_type { __be16 type; /* This is really htons(ether_type). */ bool ignore_outgoing; diff --git a/net/core/dev.c b/net/core/dev.c index 1a90ed8cc6cc..3ee7a514dca8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6745,6 +6745,46 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, } EXPORT_SYMBOL(netif_queue_set_napi); +#ifdef CONFIG_RFS_ACCEL +static void netif_disable_cpu_rmap(struct net_device *dev) +{ + free_irq_cpu_rmap(dev->rx_cpu_rmap); + dev->rx_cpu_rmap = NULL; + dev->rx_cpu_rmap_auto = false; +} + +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs) +{ + dev->rx_cpu_rmap = alloc_irq_cpu_rmap(num_irqs); + if (!dev->rx_cpu_rmap) + return -ENOMEM; + + dev->rx_cpu_rmap_auto = true; + return 0; +} +EXPORT_SYMBOL(netif_enable_cpu_rmap); +#endif + +void netif_napi_set_irq(struct napi_struct *napi, int irq) +{ +#ifdef CONFIG_RFS_ACCEL + int rc; +#endif + napi->irq = irq; + +#ifdef CONFIG_RFS_ACCEL + if (napi->dev->rx_cpu_rmap && napi->dev->rx_cpu_rmap_auto) { + rc = irq_cpu_rmap_add(napi->dev->rx_cpu_rmap, irq); + if (rc) { + netdev_warn(napi->dev, "Unable to update ARFS map (%d)\n", + rc); + netif_disable_cpu_rmap(napi->dev); + } + } +#endif +} +EXPORT_SYMBOL(netif_napi_set_irq); + static void napi_restore_config(struct napi_struct *n) { n->defer_hard_irqs = n->config->defer_hard_irqs; @@ -11421,6 +11461,10 @@ void free_netdev(struct net_device *dev) /* Flush device addresses */ dev_addr_flush(dev); +#ifdef CONFIG_RFS_ACCEL + if (dev->rx_cpu_rmap && dev->rx_cpu_rmap_auto) + netif_disable_cpu_rmap(dev); +#endif list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p);
Add a new netdev flag "rx_cpu_rmap_auto". Drivers supporting ARFS should set the flag via netif_enable_cpu_rmap() and core will allocate and manage the ARFS rmap. Freeing the rmap is also done by core when the netdev is freed. Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com> --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 38 ++--------------- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 27 ++---------- drivers/net/ethernet/intel/ice/ice_arfs.c | 17 +------- include/linux/netdevice.h | 15 +++++-- net/core/dev.c | 44 ++++++++++++++++++++ 5 files changed, 63 insertions(+), 78 deletions(-)