Message ID | 20250113171042.158123-2-ahmed.zaki@intel.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: napi: add CPU affinity to napi->config | expand |
> Add a new netdev flag "rx_cpu_rmap_auto". Drivers supporting ARFS should > set the flag via netif_enable_cpu_rmap() and core will allocate and manage > the ARFS rmap. Freeing the rmap is also done by core when the netdev is > freed. > > Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com> > --- > drivers/net/ethernet/amazon/ena/ena_netdev.c | 38 ++--------------- > drivers/net/ethernet/broadcom/bnxt/bnxt.c | 27 ++---------- > drivers/net/ethernet/intel/ice/ice_arfs.c | 17 +------- > include/linux/netdevice.h | 15 +++++-- > net/core/dev.c | 44 ++++++++++++++++++++ > 5 files changed, 63 insertions(+), 78 deletions(-) > > diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c > b/drivers/net/ethernet/amazon/ena/ena_netdev.c > index c1295dfad0d0..a3fceaa83cd5 100644 > --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c > +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c > @@ -5,9 +5,6 @@ > > #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > > -#ifdef CONFIG_RFS_ACCEL > -#include <linux/cpu_rmap.h> > -#endif /* CONFIG_RFS_ACCEL */ > #include <linux/ethtool.h> > #include <linux/kernel.h> > #include <linux/module.h> > @@ -165,25 +162,10 @@ int ena_xmit_common(struct ena_adapter > *adapter, static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) { > #ifdef CONFIG_RFS_ACCEL > - u32 i; > - int rc; > - > - adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter- > >num_io_queues); > - if (!adapter->netdev->rx_cpu_rmap) > - return -ENOMEM; > - for (i = 0; i < adapter->num_io_queues; i++) { > - int irq_idx = ENA_IO_IRQ_IDX(i); > - > - rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap, > - pci_irq_vector(adapter->pdev, irq_idx)); > - if (rc) { > - free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); > - adapter->netdev->rx_cpu_rmap = NULL; > - return rc; > - } > - } > -#endif /* CONFIG_RFS_ACCEL */ > + return netif_enable_cpu_rmap(adapter->netdev, > +adapter->num_io_queues); #else > return 0; > +#endif /* CONFIG_RFS_ACCEL */ > } > > static void ena_init_io_rings_common(struct ena_adapter *adapter, @@ - > 1742,13 +1724,6 @@ static void ena_free_io_irq(struct ena_adapter > *adapter) > struct ena_irq *irq; > int i; > > -#ifdef CONFIG_RFS_ACCEL > - if (adapter->msix_vecs >= 1) { > - free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); > - adapter->netdev->rx_cpu_rmap = NULL; > - } > -#endif /* CONFIG_RFS_ACCEL */ > - > for (i = ENA_IO_IRQ_FIRST_IDX; i < > ENA_MAX_MSIX_VEC(io_queue_count); i++) { > irq = &adapter->irq_tbl[i]; > irq_set_affinity_hint(irq->vector, NULL); @@ -4131,13 +4106,6 @@ > static void __ena_shutoff(struct pci_dev *pdev, bool shutdown) > ena_dev = adapter->ena_dev; > netdev = adapter->netdev; > > -#ifdef CONFIG_RFS_ACCEL > - if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) { > - free_irq_cpu_rmap(netdev->rx_cpu_rmap); > - netdev->rx_cpu_rmap = NULL; > - } > - > -#endif /* CONFIG_RFS_ACCEL */ > /* Make sure timer and reset routine won't be called after > * freeing device resources. > */ > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c > b/drivers/net/ethernet/broadcom/bnxt/bnxt.c > index 884d42db5554..1f50bc715038 100644 > --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c > +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c > @@ -49,7 +49,6 @@ > #include <linux/cache.h> > #include <linux/log2.h> > #include <linux/bitmap.h> > -#include <linux/cpu_rmap.h> > #include <linux/cpumask.h> > #include <net/pkt_cls.h> > #include <net/page_pool/helpers.h> > @@ -10861,7 +10860,7 @@ static int bnxt_set_real_num_queues(struct bnxt > *bp) > > #ifdef CONFIG_RFS_ACCEL > if (bp->flags & BNXT_FLAG_RFS) > - dev->rx_cpu_rmap = alloc_irq_cpu_rmap(bp->rx_nr_rings); > + return netif_enable_cpu_rmap(dev, bp->rx_nr_rings); > #endif > > return rc; > @@ -11215,10 +11214,6 @@ static void bnxt_free_irq(struct bnxt *bp) > struct bnxt_irq *irq; > int i; > > -#ifdef CONFIG_RFS_ACCEL > - free_irq_cpu_rmap(bp->dev->rx_cpu_rmap); > - bp->dev->rx_cpu_rmap = NULL; > -#endif > if (!bp->irq_tbl || !bp->bnapi) > return; > > @@ -11241,11 +11236,8 @@ static void bnxt_free_irq(struct bnxt *bp) > > static int bnxt_request_irq(struct bnxt *bp) { > - int i, j, rc = 0; > + int i, rc = 0; > unsigned long flags = 0; > -#ifdef CONFIG_RFS_ACCEL > - struct cpu_rmap *rmap; > -#endif > > rc = bnxt_setup_int_mode(bp); > if (rc) { > @@ -11253,22 +11245,11 @@ static int bnxt_request_irq(struct bnxt *bp) > rc); > return rc; > } > -#ifdef CONFIG_RFS_ACCEL > - rmap = bp->dev->rx_cpu_rmap; > -#endif > - for (i = 0, j = 0; i < bp->cp_nr_rings; i++) { > + > + for (i = 0; i < bp->cp_nr_rings; i++) { > int map_idx = bnxt_cp_num_to_irq_num(bp, i); > struct bnxt_irq *irq = &bp->irq_tbl[map_idx]; > > -#ifdef CONFIG_RFS_ACCEL > - if (rmap && bp->bnapi[i]->rx_ring) { > - rc = irq_cpu_rmap_add(rmap, irq->vector); > - if (rc) > - netdev_warn(bp->dev, "failed adding irq rmap for ring > %d\n", > - j); > - j++; > - } > -#endif > rc = request_irq(irq->vector, irq->handler, flags, irq->name, > bp->bnapi[i]); > if (rc) > diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c > b/drivers/net/ethernet/intel/ice/ice_arfs.c > index 7cee365cc7d1..3b1b892e6958 100644 > --- a/drivers/net/ethernet/intel/ice/ice_arfs.c > +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c > @@ -584,9 +584,6 @@ void ice_free_cpu_rx_rmap(struct ice_vsi *vsi) > netdev = vsi->netdev; > if (!netdev || !netdev->rx_cpu_rmap) > return; > - > - free_irq_cpu_rmap(netdev->rx_cpu_rmap); > - netdev->rx_cpu_rmap = NULL; > } > > /** > @@ -597,7 +594,6 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) { > struct net_device *netdev; > struct ice_pf *pf; > - int i; > > if (!vsi || vsi->type != ICE_VSI_PF) > return 0; > @@ -610,18 +606,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) > netdev_dbg(netdev, "Setup CPU RMAP: vsi type 0x%x, ifname %s, > q_vectors %d\n", > vsi->type, netdev->name, vsi->num_q_vectors); > > - netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(vsi->num_q_vectors); > - if (unlikely(!netdev->rx_cpu_rmap)) > - return -EINVAL; > - > - ice_for_each_q_vector(vsi, i) > - if (irq_cpu_rmap_add(netdev->rx_cpu_rmap, > - vsi->q_vectors[i]->irq.virq)) { > - ice_free_cpu_rx_rmap(vsi); > - return -EINVAL; > - } > - > - return 0; > + return netif_enable_cpu_rmap(netdev, vsi->num_q_vectors); > } > > /** > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index > aeb4a6cff171..7e95e9ee36dd 100644 > --- a/include/linux/netdevice.h > +++ b/include/linux/netdevice.h > @@ -1991,6 +1991,9 @@ enum netdev_reg_state { > * > * @threaded: napi threaded mode is enabled > * > + * @rx_cpu_rmap_auto: driver wants the core to manage the ARFS rmap. > + * Set by calling netif_enable_cpu_rmap(). > + * > * @see_all_hwtstamp_requests: device wants to see calls to > * ndo_hwtstamp_set() for all timestamp requests > * regardless of source, even if those aren't > @@ -2398,6 +2401,9 @@ struct net_device { > struct lock_class_key *qdisc_tx_busylock; > bool proto_down; > bool threaded; > +#ifdef CONFIG_RFS_ACCEL > + bool rx_cpu_rmap_auto; > +#endif > > /* priv_flags_slow, ungrouped to save space */ > unsigned long see_all_hwtstamp_requests:1; > @@ -2671,10 +2677,7 @@ void netif_queue_set_napi(struct net_device > *dev, unsigned int queue_index, > enum netdev_queue_type type, > struct napi_struct *napi); > > -static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) -{ > - napi->irq = irq; > -} > +void netif_napi_set_irq(struct napi_struct *napi, int irq); > > /* Default NAPI poll() weight > * Device drivers are strongly advised to not use bigger value @@ -2765,6 > +2768,10 @@ static inline void netif_napi_del(struct napi_struct *napi) > synchronize_net(); > } > > +#ifdef CONFIG_RFS_ACCEL > +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int > +num_irqs); > + > +#endif > struct packet_type { > __be16 type; /* This is really htons(ether_type). */ > bool ignore_outgoing; > diff --git a/net/core/dev.c b/net/core/dev.c index > 1a90ed8cc6cc..3ee7a514dca8 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -6745,6 +6745,46 @@ void netif_queue_set_napi(struct net_device > *dev, unsigned int queue_index, } > EXPORT_SYMBOL(netif_queue_set_napi); > > +#ifdef CONFIG_RFS_ACCEL > +static void netif_disable_cpu_rmap(struct net_device *dev) { > + free_irq_cpu_rmap(dev->rx_cpu_rmap); > + dev->rx_cpu_rmap = NULL; > + dev->rx_cpu_rmap_auto = false; > +} > + > +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int > +num_irqs) { > + dev->rx_cpu_rmap = alloc_irq_cpu_rmap(num_irqs); > + if (!dev->rx_cpu_rmap) > + return -ENOMEM; > + > + dev->rx_cpu_rmap_auto = true; > + return 0; > +} > +EXPORT_SYMBOL(netif_enable_cpu_rmap); > +#endif > + > +void netif_napi_set_irq(struct napi_struct *napi, int irq) { #ifdef > +CONFIG_RFS_ACCEL > + int rc; > +#endif > + napi->irq = irq; > + > +#ifdef CONFIG_RFS_ACCEL > + if (napi->dev->rx_cpu_rmap && napi->dev->rx_cpu_rmap_auto) { > + rc = irq_cpu_rmap_add(napi->dev->rx_cpu_rmap, irq); > + if (rc) { > + netdev_warn(napi->dev, "Unable to update ARFS map (%d)\n", > + rc); > + netif_disable_cpu_rmap(napi->dev); > + } > + } > +#endif > +} > +EXPORT_SYMBOL(netif_napi_set_irq); > + > static void napi_restore_config(struct napi_struct *n) { > n->defer_hard_irqs = n->config->defer_hard_irqs; @@ -11421,6 > +11461,10 @@ void free_netdev(struct net_device *dev) > /* Flush device addresses */ > dev_addr_flush(dev); > > +#ifdef CONFIG_RFS_ACCEL > + if (dev->rx_cpu_rmap && dev->rx_cpu_rmap_auto) > + netif_disable_cpu_rmap(dev); #endif > list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) > netif_napi_del(p); > > -- > 2.43.0 Thanks for making the change in the ENA driver. Acked-by: David Arinzon <darinzon@amazon.com>
On Mon, 13 Jan 2025 10:10:37 -0700 Ahmed Zaki wrote: > -#endif /* CONFIG_RFS_ACCEL */ > + return netif_enable_cpu_rmap(adapter->netdev, adapter->num_io_queues); > +#else > return 0; > +#endif /* CONFIG_RFS_ACCEL */ Let's try to eliminate some of the ifdef-ery on the driver side. netif_enable_cpu_rmap() should simply do nothing if !CONFIG_RFS_ACCEL > @@ -2398,6 +2401,9 @@ struct net_device { > struct lock_class_key *qdisc_tx_busylock; > bool proto_down; > bool threaded; > +#ifdef CONFIG_RFS_ACCEL > + bool rx_cpu_rmap_auto; > +#endif similar point, don't hide it, it's just one byte and we can just leave it as false if !CONFIG_RFS_ACCEL. It can save us a bunch of other ifdefs > +#ifdef CONFIG_RFS_ACCEL > +static void netif_disable_cpu_rmap(struct net_device *dev) > +{ > + free_irq_cpu_rmap(dev->rx_cpu_rmap); > + dev->rx_cpu_rmap = NULL; > + dev->rx_cpu_rmap_auto = false; > +} Better do do: static void netif_disable_cpu_rmap(struct net_device *dev) { #ifdef CONFIG_RFS_ACCEL free_irq_cpu_rmap(dev->rx_cpu_rmap); dev->rx_cpu_rmap = NULL; dev->rx_cpu_rmap_auto = false; #endif } IOW if not relevant the function should do nothing > +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs) > +{ > + dev->rx_cpu_rmap = alloc_irq_cpu_rmap(num_irqs); > + if (!dev->rx_cpu_rmap) > + return -ENOMEM; > + > + dev->rx_cpu_rmap_auto = true; > + return 0; > +} > +EXPORT_SYMBOL(netif_enable_cpu_rmap); here you can depend on dead code elimination: int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs) { if (!IS_ENABLED(CONFIG_RFS_ACCEL)) return 0; ... } > +#endif > + > +void netif_napi_set_irq(struct napi_struct *napi, int irq) > +{ > +#ifdef CONFIG_RFS_ACCEL > + int rc; > +#endif > + napi->irq = irq; > + > +#ifdef CONFIG_RFS_ACCEL > + if (napi->dev->rx_cpu_rmap && napi->dev->rx_cpu_rmap_auto) { > + rc = irq_cpu_rmap_add(napi->dev->rx_cpu_rmap, irq); > + if (rc) { > + netdev_warn(napi->dev, "Unable to update ARFS map (%d)\n", > + rc); > + netif_disable_cpu_rmap(napi->dev); > + } > + } > +#endif Declare rc inside the if to avoid the extra ifdef on variable decl > +} > +EXPORT_SYMBOL(netif_napi_set_irq); > + > static void napi_restore_config(struct napi_struct *n) > { > n->defer_hard_irqs = n->config->defer_hard_irqs; > @@ -11421,6 +11461,10 @@ void free_netdev(struct net_device *dev) > /* Flush device addresses */ > dev_addr_flush(dev); > > +#ifdef CONFIG_RFS_ACCEL > + if (dev->rx_cpu_rmap && dev->rx_cpu_rmap_auto) don't check dev->rx_cpu_rmap, dev->rx_cpu_rmap_auto is enough > + netif_disable_cpu_rmap(dev); > +#endif > list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) > netif_napi_del(p); > IRQs are often allocated in ndo_open and freed in ndo_stop, so you need to catch netif_napi_del or napi_disable and remove the IRQ from the map. Similarly netif_napi_set_irq() may get called with -1 to clear the IRQ number, which you currently treat at a real IRQ id, AFAICT.
On 2025-01-14 3:08 p.m., Jakub Kicinski wrote: > On Mon, 13 Jan 2025 10:10:37 -0700 Ahmed Zaki wrote: >> -#endif /* CONFIG_RFS_ACCEL */ >> + return netif_enable_cpu_rmap(adapter->netdev, adapter->num_io_queues); >> +#else >> return 0; >> +#endif /* CONFIG_RFS_ACCEL */ > > Let's try to eliminate some of the ifdef-ery on the driver side. > netif_enable_cpu_rmap() should simply do nothing if !CONFIG_RFS_ACCEL > >> @@ -2398,6 +2401,9 @@ struct net_device { >> struct lock_class_key *qdisc_tx_busylock; >> bool proto_down; >> bool threaded; >> +#ifdef CONFIG_RFS_ACCEL >> + bool rx_cpu_rmap_auto; >> +#endif > > similar point, don't hide it, it's just one byte and we can just leave > it as false if !CONFIG_RFS_ACCEL. It can save us a bunch of other ifdefs Ok, makes sense. > >> +#ifdef CONFIG_RFS_ACCEL >> +static void netif_disable_cpu_rmap(struct net_device *dev) >> +{ >> + free_irq_cpu_rmap(dev->rx_cpu_rmap); >> + dev->rx_cpu_rmap = NULL; >> + dev->rx_cpu_rmap_auto = false; >> +} > > Better do do: > > static void netif_disable_cpu_rmap(struct net_device *dev) > { > #ifdef CONFIG_RFS_ACCEL > free_irq_cpu_rmap(dev->rx_cpu_rmap); > dev->rx_cpu_rmap = NULL; > dev->rx_cpu_rmap_auto = false; > #endif > } Sure. > > IOW if not relevant the function should do nothing > >> +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs) >> +{ >> + dev->rx_cpu_rmap = alloc_irq_cpu_rmap(num_irqs); >> + if (!dev->rx_cpu_rmap) >> + return -ENOMEM; >> + >> + dev->rx_cpu_rmap_auto = true; >> + return 0; >> +} >> +EXPORT_SYMBOL(netif_enable_cpu_rmap); > > here you can depend on dead code elimination: > > int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs) > { > if (!IS_ENABLED(CONFIG_RFS_ACCEL)) > return 0; > > ... > } > netdev->rx_cpu_rmap is declared inside #ifdef CONFIG_RFS_ACCEL, so I still need #ifdef inside netif_enable_cpu_rmap(). I will do the same as in netif_disable_cpu_rmap() though, that will keep the function visible. >> +#endif >> + >> +void netif_napi_set_irq(struct napi_struct *napi, int irq) >> +{ >> +#ifdef CONFIG_RFS_ACCEL >> + int rc; >> +#endif >> + napi->irq = irq; >> + >> +#ifdef CONFIG_RFS_ACCEL >> + if (napi->dev->rx_cpu_rmap && napi->dev->rx_cpu_rmap_auto) { >> + rc = irq_cpu_rmap_add(napi->dev->rx_cpu_rmap, irq); >> + if (rc) { >> + netdev_warn(napi->dev, "Unable to update ARFS map (%d)\n", >> + rc); >> + netif_disable_cpu_rmap(napi->dev); >> + } >> + } >> +#endif > > Declare rc inside the if to avoid the extra ifdef on variable decl The CONFIG_RFS_ACCEL is removed in a later patch (3) when the irq_affinity_auto is introduced and rc is re-used. Instead, I will move "napi->irq = irq;" to the end and merge the 2 RFS_ACCL blocks. > >> +} >> +EXPORT_SYMBOL(netif_napi_set_irq); >> + >> static void napi_restore_config(struct napi_struct *n) >> { >> n->defer_hard_irqs = n->config->defer_hard_irqs; >> @@ -11421,6 +11461,10 @@ void free_netdev(struct net_device *dev) >> /* Flush device addresses */ >> dev_addr_flush(dev); >> >> +#ifdef CONFIG_RFS_ACCEL >> + if (dev->rx_cpu_rmap && dev->rx_cpu_rmap_auto) > > don't check dev->rx_cpu_rmap, dev->rx_cpu_rmap_auto is enough yes, also a good point. > >> + netif_disable_cpu_rmap(dev); >> +#endif >> list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) >> netif_napi_del(p); >> > > IRQs are often allocated in ndo_open and freed in ndo_stop, so > you need to catch netif_napi_del or napi_disable and remove > the IRQ from the map. Ok, I will look into that too. > > Similarly netif_napi_set_irq() may get called with -1 to clear > the IRQ number, which you currently treat at a real IRQ id, AFAICT. correct there is no handling for irq = -1. So netif_napi_set_irq() needs to add the irq to the rmap only if it is > 0. I need to clarify expectation of netif_napi_set_irq() because I only see it called with irq = -1 in napi_add_weight. But you say it can be called with irq = -1 to "clear" the IRQ. Does this mean that, if irq = -1, we need to "delete" the irq from rmap if a valid irq already existed (which means this can happen as an alternative to napi_del()/napi_diable())? or just skip adding irq to rmap is enough? Thanks, Ahmed
On Tue, 14 Jan 2025 18:00:30 -0700 Ahmed Zaki wrote: > > Similarly netif_napi_set_irq() may get called with -1 to clear > > the IRQ number, which you currently treat at a real IRQ id, AFAICT. > > correct there is no handling for irq = -1. So netif_napi_set_irq() needs > to add the irq to the rmap only if it is > 0. > > I need to clarify expectation of netif_napi_set_irq() because I only see > it called with irq = -1 in napi_add_weight. But you say it can be called > with irq = -1 to "clear" the IRQ. I _think_ that's what Amritha had in mind. For queue <> NAPI linking similarly we are expected to call the same helper with a NULL param. > Does this mean that, if irq = -1, we need to "delete" the irq from rmap > if a valid irq already existed (which means this can happen as an > alternative to napi_del()/napi_diable())? or just skip adding irq to > rmap is enough? I'm afraid we need both. Most drivers today simply never clear the IRQ, they will just delete the NAPI and kfree() its memory. So we need to "catch" NAPIs with IRQs assigned getting deleted and clean up the IRQ. In the future some drivers may explicitly call the set with -1, especially now that the IRQ has more implications than just getting reported via netlink. We need to support that, too. And for a good measure we should also throw in a warning if a driver tries to set the IRQ but the IRQ is already set in the NAPI (not -1).
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index c1295dfad0d0..a3fceaa83cd5 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -5,9 +5,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#ifdef CONFIG_RFS_ACCEL -#include <linux/cpu_rmap.h> -#endif /* CONFIG_RFS_ACCEL */ #include <linux/ethtool.h> #include <linux/kernel.h> #include <linux/module.h> @@ -165,25 +162,10 @@ int ena_xmit_common(struct ena_adapter *adapter, static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) { #ifdef CONFIG_RFS_ACCEL - u32 i; - int rc; - - adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues); - if (!adapter->netdev->rx_cpu_rmap) - return -ENOMEM; - for (i = 0; i < adapter->num_io_queues; i++) { - int irq_idx = ENA_IO_IRQ_IDX(i); - - rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap, - pci_irq_vector(adapter->pdev, irq_idx)); - if (rc) { - free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); - adapter->netdev->rx_cpu_rmap = NULL; - return rc; - } - } -#endif /* CONFIG_RFS_ACCEL */ + return netif_enable_cpu_rmap(adapter->netdev, adapter->num_io_queues); +#else return 0; +#endif /* CONFIG_RFS_ACCEL */ } static void ena_init_io_rings_common(struct ena_adapter *adapter, @@ -1742,13 +1724,6 @@ static void ena_free_io_irq(struct ena_adapter *adapter) struct ena_irq *irq; int i; -#ifdef CONFIG_RFS_ACCEL - if (adapter->msix_vecs >= 1) { - free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); - adapter->netdev->rx_cpu_rmap = NULL; - } -#endif /* CONFIG_RFS_ACCEL */ - for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) { irq = &adapter->irq_tbl[i]; irq_set_affinity_hint(irq->vector, NULL); @@ -4131,13 +4106,6 @@ static void __ena_shutoff(struct pci_dev *pdev, bool shutdown) ena_dev = adapter->ena_dev; netdev = adapter->netdev; -#ifdef CONFIG_RFS_ACCEL - if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) { - free_irq_cpu_rmap(netdev->rx_cpu_rmap); - netdev->rx_cpu_rmap = NULL; - } - -#endif /* CONFIG_RFS_ACCEL */ /* Make sure timer and reset routine won't be called after * freeing device resources. */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 884d42db5554..1f50bc715038 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -49,7 +49,6 @@ #include <linux/cache.h> #include <linux/log2.h> #include <linux/bitmap.h> -#include <linux/cpu_rmap.h> #include <linux/cpumask.h> #include <net/pkt_cls.h> #include <net/page_pool/helpers.h> @@ -10861,7 +10860,7 @@ static int bnxt_set_real_num_queues(struct bnxt *bp) #ifdef CONFIG_RFS_ACCEL if (bp->flags & BNXT_FLAG_RFS) - dev->rx_cpu_rmap = alloc_irq_cpu_rmap(bp->rx_nr_rings); + return netif_enable_cpu_rmap(dev, bp->rx_nr_rings); #endif return rc; @@ -11215,10 +11214,6 @@ static void bnxt_free_irq(struct bnxt *bp) struct bnxt_irq *irq; int i; -#ifdef CONFIG_RFS_ACCEL - free_irq_cpu_rmap(bp->dev->rx_cpu_rmap); - bp->dev->rx_cpu_rmap = NULL; -#endif if (!bp->irq_tbl || !bp->bnapi) return; @@ -11241,11 +11236,8 @@ static void bnxt_free_irq(struct bnxt *bp) static int bnxt_request_irq(struct bnxt *bp) { - int i, j, rc = 0; + int i, rc = 0; unsigned long flags = 0; -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; -#endif rc = bnxt_setup_int_mode(bp); if (rc) { @@ -11253,22 +11245,11 @@ static int bnxt_request_irq(struct bnxt *bp) rc); return rc; } -#ifdef CONFIG_RFS_ACCEL - rmap = bp->dev->rx_cpu_rmap; -#endif - for (i = 0, j = 0; i < bp->cp_nr_rings; i++) { + + for (i = 0; i < bp->cp_nr_rings; i++) { int map_idx = bnxt_cp_num_to_irq_num(bp, i); struct bnxt_irq *irq = &bp->irq_tbl[map_idx]; -#ifdef CONFIG_RFS_ACCEL - if (rmap && bp->bnapi[i]->rx_ring) { - rc = irq_cpu_rmap_add(rmap, irq->vector); - if (rc) - netdev_warn(bp->dev, "failed adding irq rmap for ring %d\n", - j); - j++; - } -#endif rc = request_irq(irq->vector, irq->handler, flags, irq->name, bp->bnapi[i]); if (rc) diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index 7cee365cc7d1..3b1b892e6958 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -584,9 +584,6 @@ void ice_free_cpu_rx_rmap(struct ice_vsi *vsi) netdev = vsi->netdev; if (!netdev || !netdev->rx_cpu_rmap) return; - - free_irq_cpu_rmap(netdev->rx_cpu_rmap); - netdev->rx_cpu_rmap = NULL; } /** @@ -597,7 +594,6 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) { struct net_device *netdev; struct ice_pf *pf; - int i; if (!vsi || vsi->type != ICE_VSI_PF) return 0; @@ -610,18 +606,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) netdev_dbg(netdev, "Setup CPU RMAP: vsi type 0x%x, ifname %s, q_vectors %d\n", vsi->type, netdev->name, vsi->num_q_vectors); - netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(vsi->num_q_vectors); - if (unlikely(!netdev->rx_cpu_rmap)) - return -EINVAL; - - ice_for_each_q_vector(vsi, i) - if (irq_cpu_rmap_add(netdev->rx_cpu_rmap, - vsi->q_vectors[i]->irq.virq)) { - ice_free_cpu_rx_rmap(vsi); - return -EINVAL; - } - - return 0; + return netif_enable_cpu_rmap(netdev, vsi->num_q_vectors); } /** diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index aeb4a6cff171..7e95e9ee36dd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1991,6 +1991,9 @@ enum netdev_reg_state { * * @threaded: napi threaded mode is enabled * + * @rx_cpu_rmap_auto: driver wants the core to manage the ARFS rmap. + * Set by calling netif_enable_cpu_rmap(). + * * @see_all_hwtstamp_requests: device wants to see calls to * ndo_hwtstamp_set() for all timestamp requests * regardless of source, even if those aren't @@ -2398,6 +2401,9 @@ struct net_device { struct lock_class_key *qdisc_tx_busylock; bool proto_down; bool threaded; +#ifdef CONFIG_RFS_ACCEL + bool rx_cpu_rmap_auto; +#endif /* priv_flags_slow, ungrouped to save space */ unsigned long see_all_hwtstamp_requests:1; @@ -2671,10 +2677,7 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, enum netdev_queue_type type, struct napi_struct *napi); -static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) -{ - napi->irq = irq; -} +void netif_napi_set_irq(struct napi_struct *napi, int irq); /* Default NAPI poll() weight * Device drivers are strongly advised to not use bigger value @@ -2765,6 +2768,10 @@ static inline void netif_napi_del(struct napi_struct *napi) synchronize_net(); } +#ifdef CONFIG_RFS_ACCEL +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs); + +#endif struct packet_type { __be16 type; /* This is really htons(ether_type). */ bool ignore_outgoing; diff --git a/net/core/dev.c b/net/core/dev.c index 1a90ed8cc6cc..3ee7a514dca8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6745,6 +6745,46 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, } EXPORT_SYMBOL(netif_queue_set_napi); +#ifdef CONFIG_RFS_ACCEL +static void netif_disable_cpu_rmap(struct net_device *dev) +{ + free_irq_cpu_rmap(dev->rx_cpu_rmap); + dev->rx_cpu_rmap = NULL; + dev->rx_cpu_rmap_auto = false; +} + +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs) +{ + dev->rx_cpu_rmap = alloc_irq_cpu_rmap(num_irqs); + if (!dev->rx_cpu_rmap) + return -ENOMEM; + + dev->rx_cpu_rmap_auto = true; + return 0; +} +EXPORT_SYMBOL(netif_enable_cpu_rmap); +#endif + +void netif_napi_set_irq(struct napi_struct *napi, int irq) +{ +#ifdef CONFIG_RFS_ACCEL + int rc; +#endif + napi->irq = irq; + +#ifdef CONFIG_RFS_ACCEL + if (napi->dev->rx_cpu_rmap && napi->dev->rx_cpu_rmap_auto) { + rc = irq_cpu_rmap_add(napi->dev->rx_cpu_rmap, irq); + if (rc) { + netdev_warn(napi->dev, "Unable to update ARFS map (%d)\n", + rc); + netif_disable_cpu_rmap(napi->dev); + } + } +#endif +} +EXPORT_SYMBOL(netif_napi_set_irq); + static void napi_restore_config(struct napi_struct *n) { n->defer_hard_irqs = n->config->defer_hard_irqs; @@ -11421,6 +11461,10 @@ void free_netdev(struct net_device *dev) /* Flush device addresses */ dev_addr_flush(dev); +#ifdef CONFIG_RFS_ACCEL + if (dev->rx_cpu_rmap && dev->rx_cpu_rmap_auto) + netif_disable_cpu_rmap(dev); +#endif list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p);
Add a new netdev flag "rx_cpu_rmap_auto". Drivers supporting ARFS should set the flag via netif_enable_cpu_rmap() and core will allocate and manage the ARFS rmap. Freeing the rmap is also done by core when the netdev is freed. Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com> --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 38 ++--------------- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 27 ++---------- drivers/net/ethernet/intel/ice/ice_arfs.c | 17 +------- include/linux/netdevice.h | 15 +++++-- net/core/dev.c | 44 ++++++++++++++++++++ 5 files changed, 63 insertions(+), 78 deletions(-)