diff mbox series

[net-next,v5,1/6] net: move ARFS rmap management to core

Message ID 20250113171042.158123-2-ahmed.zaki@intel.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series net: napi: add CPU affinity to napi->config | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 40 this patch: 40
netdev/build_tools success Errors and warnings before: 0 (+1) this patch: 0 (+1)
netdev/cc_maintainers warning 2 maintainers not CCed: saeedb@amazon.com akiyano@amazon.com
netdev/build_clang success Errors and warnings before: 6616 this patch: 6616
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4097 this patch: 4097
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 254 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 105 this patch: 105
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2025-01-14--03-00 (tests: 885)

Commit Message

Ahmed Zaki Jan. 13, 2025, 5:10 p.m. UTC
Add a new netdev flag "rx_cpu_rmap_auto". Drivers supporting ARFS should
set the flag via netif_enable_cpu_rmap() and core will allocate and manage
the ARFS rmap. Freeing the rmap is also done by core when the netdev is
freed.

Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 38 ++---------------
 drivers/net/ethernet/broadcom/bnxt/bnxt.c    | 27 ++----------
 drivers/net/ethernet/intel/ice/ice_arfs.c    | 17 +-------
 include/linux/netdevice.h                    | 15 +++++--
 net/core/dev.c                               | 44 ++++++++++++++++++++
 5 files changed, 63 insertions(+), 78 deletions(-)

Comments

Arinzon, David Jan. 14, 2025, 9:33 a.m. UTC | #1
> Add a new netdev flag "rx_cpu_rmap_auto". Drivers supporting ARFS should
> set the flag via netif_enable_cpu_rmap() and core will allocate and manage
> the ARFS rmap. Freeing the rmap is also done by core when the netdev is
> freed.
> 
> Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
> ---
>  drivers/net/ethernet/amazon/ena/ena_netdev.c | 38 ++---------------
>  drivers/net/ethernet/broadcom/bnxt/bnxt.c    | 27 ++----------
>  drivers/net/ethernet/intel/ice/ice_arfs.c    | 17 +-------
>  include/linux/netdevice.h                    | 15 +++++--
>  net/core/dev.c                               | 44 ++++++++++++++++++++
>  5 files changed, 63 insertions(+), 78 deletions(-)
> 
> diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c
> b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> index c1295dfad0d0..a3fceaa83cd5 100644
> --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
> +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> @@ -5,9 +5,6 @@
> 
>  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> 
> -#ifdef CONFIG_RFS_ACCEL
> -#include <linux/cpu_rmap.h>
> -#endif /* CONFIG_RFS_ACCEL */
>  #include <linux/ethtool.h>
>  #include <linux/kernel.h>
>  #include <linux/module.h>
> @@ -165,25 +162,10 @@ int ena_xmit_common(struct ena_adapter
> *adapter,  static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)  {
> #ifdef CONFIG_RFS_ACCEL
> -       u32 i;
> -       int rc;
> -
> -       adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter-
> >num_io_queues);
> -       if (!adapter->netdev->rx_cpu_rmap)
> -               return -ENOMEM;
> -       for (i = 0; i < adapter->num_io_queues; i++) {
> -               int irq_idx = ENA_IO_IRQ_IDX(i);
> -
> -               rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
> -                                     pci_irq_vector(adapter->pdev, irq_idx));
> -               if (rc) {
> -                       free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
> -                       adapter->netdev->rx_cpu_rmap = NULL;
> -                       return rc;
> -               }
> -       }
> -#endif /* CONFIG_RFS_ACCEL */
> +       return netif_enable_cpu_rmap(adapter->netdev,
> +adapter->num_io_queues); #else
>         return 0;
> +#endif /* CONFIG_RFS_ACCEL */
>  }
> 
>  static void ena_init_io_rings_common(struct ena_adapter *adapter, @@ -
> 1742,13 +1724,6 @@ static void ena_free_io_irq(struct ena_adapter
> *adapter)
>         struct ena_irq *irq;
>         int i;
> 
> -#ifdef CONFIG_RFS_ACCEL
> -       if (adapter->msix_vecs >= 1) {
> -               free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
> -               adapter->netdev->rx_cpu_rmap = NULL;
> -       }
> -#endif /* CONFIG_RFS_ACCEL */
> -
>         for (i = ENA_IO_IRQ_FIRST_IDX; i <
> ENA_MAX_MSIX_VEC(io_queue_count); i++) {
>                 irq = &adapter->irq_tbl[i];
>                 irq_set_affinity_hint(irq->vector, NULL); @@ -4131,13 +4106,6 @@
> static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
>         ena_dev = adapter->ena_dev;
>         netdev = adapter->netdev;
> 
> -#ifdef CONFIG_RFS_ACCEL
> -       if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
> -               free_irq_cpu_rmap(netdev->rx_cpu_rmap);
> -               netdev->rx_cpu_rmap = NULL;
> -       }
> -
> -#endif /* CONFIG_RFS_ACCEL */
>         /* Make sure timer and reset routine won't be called after
>          * freeing device resources.
>          */
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> index 884d42db5554..1f50bc715038 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> @@ -49,7 +49,6 @@
>  #include <linux/cache.h>
>  #include <linux/log2.h>
>  #include <linux/bitmap.h>
> -#include <linux/cpu_rmap.h>
>  #include <linux/cpumask.h>
>  #include <net/pkt_cls.h>
>  #include <net/page_pool/helpers.h>
> @@ -10861,7 +10860,7 @@ static int bnxt_set_real_num_queues(struct bnxt
> *bp)
> 
>  #ifdef CONFIG_RFS_ACCEL
>         if (bp->flags & BNXT_FLAG_RFS)
> -               dev->rx_cpu_rmap = alloc_irq_cpu_rmap(bp->rx_nr_rings);
> +               return netif_enable_cpu_rmap(dev, bp->rx_nr_rings);
>  #endif
> 
>         return rc;
> @@ -11215,10 +11214,6 @@ static void bnxt_free_irq(struct bnxt *bp)
>         struct bnxt_irq *irq;
>         int i;
> 
> -#ifdef CONFIG_RFS_ACCEL
> -       free_irq_cpu_rmap(bp->dev->rx_cpu_rmap);
> -       bp->dev->rx_cpu_rmap = NULL;
> -#endif
>         if (!bp->irq_tbl || !bp->bnapi)
>                 return;
> 
> @@ -11241,11 +11236,8 @@ static void bnxt_free_irq(struct bnxt *bp)
> 
>  static int bnxt_request_irq(struct bnxt *bp)  {
> -       int i, j, rc = 0;
> +       int i, rc = 0;
>         unsigned long flags = 0;
> -#ifdef CONFIG_RFS_ACCEL
> -       struct cpu_rmap *rmap;
> -#endif
> 
>         rc = bnxt_setup_int_mode(bp);
>         if (rc) {
> @@ -11253,22 +11245,11 @@ static int bnxt_request_irq(struct bnxt *bp)
>                            rc);
>                 return rc;
>         }
> -#ifdef CONFIG_RFS_ACCEL
> -       rmap = bp->dev->rx_cpu_rmap;
> -#endif
> -       for (i = 0, j = 0; i < bp->cp_nr_rings; i++) {
> +
> +       for (i = 0; i < bp->cp_nr_rings; i++) {
>                 int map_idx = bnxt_cp_num_to_irq_num(bp, i);
>                 struct bnxt_irq *irq = &bp->irq_tbl[map_idx];
> 
> -#ifdef CONFIG_RFS_ACCEL
> -               if (rmap && bp->bnapi[i]->rx_ring) {
> -                       rc = irq_cpu_rmap_add(rmap, irq->vector);
> -                       if (rc)
> -                               netdev_warn(bp->dev, "failed adding irq rmap for ring
> %d\n",
> -                                           j);
> -                       j++;
> -               }
> -#endif
>                 rc = request_irq(irq->vector, irq->handler, flags, irq->name,
>                                  bp->bnapi[i]);
>                 if (rc)
> diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c
> b/drivers/net/ethernet/intel/ice/ice_arfs.c
> index 7cee365cc7d1..3b1b892e6958 100644
> --- a/drivers/net/ethernet/intel/ice/ice_arfs.c
> +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c
> @@ -584,9 +584,6 @@ void ice_free_cpu_rx_rmap(struct ice_vsi *vsi)
>         netdev = vsi->netdev;
>         if (!netdev || !netdev->rx_cpu_rmap)
>                 return;
> -
> -       free_irq_cpu_rmap(netdev->rx_cpu_rmap);
> -       netdev->rx_cpu_rmap = NULL;
>  }
> 
>  /**
> @@ -597,7 +594,6 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)  {
>         struct net_device *netdev;
>         struct ice_pf *pf;
> -       int i;
> 
>         if (!vsi || vsi->type != ICE_VSI_PF)
>                 return 0;
> @@ -610,18 +606,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)
>         netdev_dbg(netdev, "Setup CPU RMAP: vsi type 0x%x, ifname %s,
> q_vectors %d\n",
>                    vsi->type, netdev->name, vsi->num_q_vectors);
> 
> -       netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(vsi->num_q_vectors);
> -       if (unlikely(!netdev->rx_cpu_rmap))
> -               return -EINVAL;
> -
> -       ice_for_each_q_vector(vsi, i)
> -               if (irq_cpu_rmap_add(netdev->rx_cpu_rmap,
> -                                    vsi->q_vectors[i]->irq.virq)) {
> -                       ice_free_cpu_rx_rmap(vsi);
> -                       return -EINVAL;
> -               }
> -
> -       return 0;
> +       return netif_enable_cpu_rmap(netdev, vsi->num_q_vectors);
>  }
> 
>  /**
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index
> aeb4a6cff171..7e95e9ee36dd 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1991,6 +1991,9 @@ enum netdev_reg_state {
>   *
>   *     @threaded:      napi threaded mode is enabled
>   *
> + *     @rx_cpu_rmap_auto: driver wants the core to manage the ARFS rmap.
> + *                        Set by calling netif_enable_cpu_rmap().
> + *
>   *     @see_all_hwtstamp_requests: device wants to see calls to
>   *                     ndo_hwtstamp_set() for all timestamp requests
>   *                     regardless of source, even if those aren't
> @@ -2398,6 +2401,9 @@ struct net_device {
>         struct lock_class_key   *qdisc_tx_busylock;
>         bool                    proto_down;
>         bool                    threaded;
> +#ifdef CONFIG_RFS_ACCEL
> +       bool                    rx_cpu_rmap_auto;
> +#endif
> 
>         /* priv_flags_slow, ungrouped to save space */
>         unsigned long           see_all_hwtstamp_requests:1;
> @@ -2671,10 +2677,7 @@ void netif_queue_set_napi(struct net_device
> *dev, unsigned int queue_index,
>                           enum netdev_queue_type type,
>                           struct napi_struct *napi);
> 
> -static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) -{
> -       napi->irq = irq;
> -}
> +void netif_napi_set_irq(struct napi_struct *napi, int irq);
> 
>  /* Default NAPI poll() weight
>   * Device drivers are strongly advised to not use bigger value @@ -2765,6
> +2768,10 @@ static inline void netif_napi_del(struct napi_struct *napi)
>         synchronize_net();
>  }
> 
> +#ifdef CONFIG_RFS_ACCEL
> +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int
> +num_irqs);
> +
> +#endif
>  struct packet_type {
>         __be16                  type;   /* This is really htons(ether_type). */
>         bool                    ignore_outgoing;
> diff --git a/net/core/dev.c b/net/core/dev.c index
> 1a90ed8cc6cc..3ee7a514dca8 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -6745,6 +6745,46 @@ void netif_queue_set_napi(struct net_device
> *dev, unsigned int queue_index,  }
> EXPORT_SYMBOL(netif_queue_set_napi);
> 
> +#ifdef CONFIG_RFS_ACCEL
> +static void netif_disable_cpu_rmap(struct net_device *dev) {
> +       free_irq_cpu_rmap(dev->rx_cpu_rmap);
> +       dev->rx_cpu_rmap = NULL;
> +       dev->rx_cpu_rmap_auto = false;
> +}
> +
> +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int
> +num_irqs) {
> +       dev->rx_cpu_rmap = alloc_irq_cpu_rmap(num_irqs);
> +       if (!dev->rx_cpu_rmap)
> +               return -ENOMEM;
> +
> +       dev->rx_cpu_rmap_auto = true;
> +       return 0;
> +}
> +EXPORT_SYMBOL(netif_enable_cpu_rmap);
> +#endif
> +
> +void netif_napi_set_irq(struct napi_struct *napi, int irq) { #ifdef
> +CONFIG_RFS_ACCEL
> +       int rc;
> +#endif
> +       napi->irq = irq;
> +
> +#ifdef CONFIG_RFS_ACCEL
> +       if (napi->dev->rx_cpu_rmap && napi->dev->rx_cpu_rmap_auto) {
> +               rc = irq_cpu_rmap_add(napi->dev->rx_cpu_rmap, irq);
> +               if (rc) {
> +                       netdev_warn(napi->dev, "Unable to update ARFS map (%d)\n",
> +                                   rc);
> +                       netif_disable_cpu_rmap(napi->dev);
> +               }
> +       }
> +#endif
> +}
> +EXPORT_SYMBOL(netif_napi_set_irq);
> +
>  static void napi_restore_config(struct napi_struct *n)  {
>         n->defer_hard_irqs = n->config->defer_hard_irqs; @@ -11421,6
> +11461,10 @@ void free_netdev(struct net_device *dev)
>         /* Flush device addresses */
>         dev_addr_flush(dev);
> 
> +#ifdef CONFIG_RFS_ACCEL
> +       if (dev->rx_cpu_rmap && dev->rx_cpu_rmap_auto)
> +               netif_disable_cpu_rmap(dev); #endif
>         list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
>                 netif_napi_del(p);
> 
> --
> 2.43.0

Thanks for making the change in the ENA driver.

Acked-by: David Arinzon <darinzon@amazon.com>
Jakub Kicinski Jan. 14, 2025, 10:08 p.m. UTC | #2
On Mon, 13 Jan 2025 10:10:37 -0700 Ahmed Zaki wrote:
> -#endif /* CONFIG_RFS_ACCEL */
> +	return netif_enable_cpu_rmap(adapter->netdev, adapter->num_io_queues);
> +#else
>  	return 0;
> +#endif /* CONFIG_RFS_ACCEL */

Let's try to eliminate some of the ifdef-ery on the driver side.
netif_enable_cpu_rmap() should simply do nothing if !CONFIG_RFS_ACCEL

> @@ -2398,6 +2401,9 @@ struct net_device {
> 	struct lock_class_key	*qdisc_tx_busylock;
> 	bool			proto_down;
> 	bool			threaded;
> +#ifdef CONFIG_RFS_ACCEL
> +	bool			rx_cpu_rmap_auto;
> +#endif

similar point, don't hide it, it's just one byte and we can just leave
it as false if !CONFIG_RFS_ACCEL. It can save us a bunch of other ifdefs

> +#ifdef CONFIG_RFS_ACCEL
> +static void netif_disable_cpu_rmap(struct net_device *dev)
> +{
> +	free_irq_cpu_rmap(dev->rx_cpu_rmap);
> +	dev->rx_cpu_rmap = NULL;
> +	dev->rx_cpu_rmap_auto = false;
> +}

Better do do:

static void netif_disable_cpu_rmap(struct net_device *dev)
{
#ifdef CONFIG_RFS_ACCEL
	free_irq_cpu_rmap(dev->rx_cpu_rmap);
	dev->rx_cpu_rmap = NULL;
	dev->rx_cpu_rmap_auto = false;
#endif
}

IOW if not relevant the function should do nothing

> +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs)
> +{
> +	dev->rx_cpu_rmap = alloc_irq_cpu_rmap(num_irqs);
> +	if (!dev->rx_cpu_rmap)
> +		return -ENOMEM;
> +
> +	dev->rx_cpu_rmap_auto = true;
> +	return 0;
> +}
> +EXPORT_SYMBOL(netif_enable_cpu_rmap);

here you can depend on dead code elimination:

int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs)
{
	if (!IS_ENABLED(CONFIG_RFS_ACCEL))
		return 0;

	...
}

> +#endif
> +
> +void netif_napi_set_irq(struct napi_struct *napi, int irq)
> +{
> +#ifdef CONFIG_RFS_ACCEL
> +	int rc;
> +#endif
> +	napi->irq = irq;
> +
> +#ifdef CONFIG_RFS_ACCEL
> +	if (napi->dev->rx_cpu_rmap && napi->dev->rx_cpu_rmap_auto) {
> +		rc = irq_cpu_rmap_add(napi->dev->rx_cpu_rmap, irq);
> +		if (rc) {
> +			netdev_warn(napi->dev, "Unable to update ARFS map (%d)\n",
> +				    rc);
> +			netif_disable_cpu_rmap(napi->dev);
> +		}
> +	}
> +#endif

Declare rc inside the if to avoid the extra ifdef on variable decl

> +}
> +EXPORT_SYMBOL(netif_napi_set_irq);
> +
>  static void napi_restore_config(struct napi_struct *n)
>  {
>  	n->defer_hard_irqs = n->config->defer_hard_irqs;
> @@ -11421,6 +11461,10 @@ void free_netdev(struct net_device *dev)
>  	/* Flush device addresses */
>  	dev_addr_flush(dev);
>  
> +#ifdef CONFIG_RFS_ACCEL
> +	if (dev->rx_cpu_rmap && dev->rx_cpu_rmap_auto)

don't check dev->rx_cpu_rmap, dev->rx_cpu_rmap_auto is enough

> +		netif_disable_cpu_rmap(dev);
> +#endif
>  	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
>  		netif_napi_del(p);
>  

IRQs are often allocated in ndo_open and freed in ndo_stop, so
you need to catch netif_napi_del or napi_disable and remove
the IRQ from the map.

Similarly netif_napi_set_irq() may get called with -1 to clear
the IRQ number, which you currently treat at a real IRQ id, AFAICT.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index c1295dfad0d0..a3fceaa83cd5 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -5,9 +5,6 @@ 
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#ifdef CONFIG_RFS_ACCEL
-#include <linux/cpu_rmap.h>
-#endif /* CONFIG_RFS_ACCEL */
 #include <linux/ethtool.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -165,25 +162,10 @@  int ena_xmit_common(struct ena_adapter *adapter,
 static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
 {
 #ifdef CONFIG_RFS_ACCEL
-	u32 i;
-	int rc;
-
-	adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues);
-	if (!adapter->netdev->rx_cpu_rmap)
-		return -ENOMEM;
-	for (i = 0; i < adapter->num_io_queues; i++) {
-		int irq_idx = ENA_IO_IRQ_IDX(i);
-
-		rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
-				      pci_irq_vector(adapter->pdev, irq_idx));
-		if (rc) {
-			free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
-			adapter->netdev->rx_cpu_rmap = NULL;
-			return rc;
-		}
-	}
-#endif /* CONFIG_RFS_ACCEL */
+	return netif_enable_cpu_rmap(adapter->netdev, adapter->num_io_queues);
+#else
 	return 0;
+#endif /* CONFIG_RFS_ACCEL */
 }
 
 static void ena_init_io_rings_common(struct ena_adapter *adapter,
@@ -1742,13 +1724,6 @@  static void ena_free_io_irq(struct ena_adapter *adapter)
 	struct ena_irq *irq;
 	int i;
 
-#ifdef CONFIG_RFS_ACCEL
-	if (adapter->msix_vecs >= 1) {
-		free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
-		adapter->netdev->rx_cpu_rmap = NULL;
-	}
-#endif /* CONFIG_RFS_ACCEL */
-
 	for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
 		irq = &adapter->irq_tbl[i];
 		irq_set_affinity_hint(irq->vector, NULL);
@@ -4131,13 +4106,6 @@  static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
 	ena_dev = adapter->ena_dev;
 	netdev = adapter->netdev;
 
-#ifdef CONFIG_RFS_ACCEL
-	if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
-		free_irq_cpu_rmap(netdev->rx_cpu_rmap);
-		netdev->rx_cpu_rmap = NULL;
-	}
-
-#endif /* CONFIG_RFS_ACCEL */
 	/* Make sure timer and reset routine won't be called after
 	 * freeing device resources.
 	 */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 884d42db5554..1f50bc715038 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -49,7 +49,6 @@ 
 #include <linux/cache.h>
 #include <linux/log2.h>
 #include <linux/bitmap.h>
-#include <linux/cpu_rmap.h>
 #include <linux/cpumask.h>
 #include <net/pkt_cls.h>
 #include <net/page_pool/helpers.h>
@@ -10861,7 +10860,7 @@  static int bnxt_set_real_num_queues(struct bnxt *bp)
 
 #ifdef CONFIG_RFS_ACCEL
 	if (bp->flags & BNXT_FLAG_RFS)
-		dev->rx_cpu_rmap = alloc_irq_cpu_rmap(bp->rx_nr_rings);
+		return netif_enable_cpu_rmap(dev, bp->rx_nr_rings);
 #endif
 
 	return rc;
@@ -11215,10 +11214,6 @@  static void bnxt_free_irq(struct bnxt *bp)
 	struct bnxt_irq *irq;
 	int i;
 
-#ifdef CONFIG_RFS_ACCEL
-	free_irq_cpu_rmap(bp->dev->rx_cpu_rmap);
-	bp->dev->rx_cpu_rmap = NULL;
-#endif
 	if (!bp->irq_tbl || !bp->bnapi)
 		return;
 
@@ -11241,11 +11236,8 @@  static void bnxt_free_irq(struct bnxt *bp)
 
 static int bnxt_request_irq(struct bnxt *bp)
 {
-	int i, j, rc = 0;
+	int i, rc = 0;
 	unsigned long flags = 0;
-#ifdef CONFIG_RFS_ACCEL
-	struct cpu_rmap *rmap;
-#endif
 
 	rc = bnxt_setup_int_mode(bp);
 	if (rc) {
@@ -11253,22 +11245,11 @@  static int bnxt_request_irq(struct bnxt *bp)
 			   rc);
 		return rc;
 	}
-#ifdef CONFIG_RFS_ACCEL
-	rmap = bp->dev->rx_cpu_rmap;
-#endif
-	for (i = 0, j = 0; i < bp->cp_nr_rings; i++) {
+
+	for (i = 0; i < bp->cp_nr_rings; i++) {
 		int map_idx = bnxt_cp_num_to_irq_num(bp, i);
 		struct bnxt_irq *irq = &bp->irq_tbl[map_idx];
 
-#ifdef CONFIG_RFS_ACCEL
-		if (rmap && bp->bnapi[i]->rx_ring) {
-			rc = irq_cpu_rmap_add(rmap, irq->vector);
-			if (rc)
-				netdev_warn(bp->dev, "failed adding irq rmap for ring %d\n",
-					    j);
-			j++;
-		}
-#endif
 		rc = request_irq(irq->vector, irq->handler, flags, irq->name,
 				 bp->bnapi[i]);
 		if (rc)
diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c
index 7cee365cc7d1..3b1b892e6958 100644
--- a/drivers/net/ethernet/intel/ice/ice_arfs.c
+++ b/drivers/net/ethernet/intel/ice/ice_arfs.c
@@ -584,9 +584,6 @@  void ice_free_cpu_rx_rmap(struct ice_vsi *vsi)
 	netdev = vsi->netdev;
 	if (!netdev || !netdev->rx_cpu_rmap)
 		return;
-
-	free_irq_cpu_rmap(netdev->rx_cpu_rmap);
-	netdev->rx_cpu_rmap = NULL;
 }
 
 /**
@@ -597,7 +594,6 @@  int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)
 {
 	struct net_device *netdev;
 	struct ice_pf *pf;
-	int i;
 
 	if (!vsi || vsi->type != ICE_VSI_PF)
 		return 0;
@@ -610,18 +606,7 @@  int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)
 	netdev_dbg(netdev, "Setup CPU RMAP: vsi type 0x%x, ifname %s, q_vectors %d\n",
 		   vsi->type, netdev->name, vsi->num_q_vectors);
 
-	netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(vsi->num_q_vectors);
-	if (unlikely(!netdev->rx_cpu_rmap))
-		return -EINVAL;
-
-	ice_for_each_q_vector(vsi, i)
-		if (irq_cpu_rmap_add(netdev->rx_cpu_rmap,
-				     vsi->q_vectors[i]->irq.virq)) {
-			ice_free_cpu_rx_rmap(vsi);
-			return -EINVAL;
-		}
-
-	return 0;
+	return netif_enable_cpu_rmap(netdev, vsi->num_q_vectors);
 }
 
 /**
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index aeb4a6cff171..7e95e9ee36dd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1991,6 +1991,9 @@  enum netdev_reg_state {
  *
  *	@threaded:	napi threaded mode is enabled
  *
+ *	@rx_cpu_rmap_auto: driver wants the core to manage the ARFS rmap.
+ *	                   Set by calling netif_enable_cpu_rmap().
+ *
  *	@see_all_hwtstamp_requests: device wants to see calls to
  *			ndo_hwtstamp_set() for all timestamp requests
  *			regardless of source, even if those aren't
@@ -2398,6 +2401,9 @@  struct net_device {
 	struct lock_class_key	*qdisc_tx_busylock;
 	bool			proto_down;
 	bool			threaded;
+#ifdef CONFIG_RFS_ACCEL
+	bool			rx_cpu_rmap_auto;
+#endif
 
 	/* priv_flags_slow, ungrouped to save space */
 	unsigned long		see_all_hwtstamp_requests:1;
@@ -2671,10 +2677,7 @@  void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
 			  enum netdev_queue_type type,
 			  struct napi_struct *napi);
 
-static inline void netif_napi_set_irq(struct napi_struct *napi, int irq)
-{
-	napi->irq = irq;
-}
+void netif_napi_set_irq(struct napi_struct *napi, int irq);
 
 /* Default NAPI poll() weight
  * Device drivers are strongly advised to not use bigger value
@@ -2765,6 +2768,10 @@  static inline void netif_napi_del(struct napi_struct *napi)
 	synchronize_net();
 }
 
+#ifdef CONFIG_RFS_ACCEL
+int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs);
+
+#endif
 struct packet_type {
 	__be16			type;	/* This is really htons(ether_type). */
 	bool			ignore_outgoing;
diff --git a/net/core/dev.c b/net/core/dev.c
index 1a90ed8cc6cc..3ee7a514dca8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6745,6 +6745,46 @@  void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
 }
 EXPORT_SYMBOL(netif_queue_set_napi);
 
+#ifdef CONFIG_RFS_ACCEL
+static void netif_disable_cpu_rmap(struct net_device *dev)
+{
+	free_irq_cpu_rmap(dev->rx_cpu_rmap);
+	dev->rx_cpu_rmap = NULL;
+	dev->rx_cpu_rmap_auto = false;
+}
+
+int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs)
+{
+	dev->rx_cpu_rmap = alloc_irq_cpu_rmap(num_irqs);
+	if (!dev->rx_cpu_rmap)
+		return -ENOMEM;
+
+	dev->rx_cpu_rmap_auto = true;
+	return 0;
+}
+EXPORT_SYMBOL(netif_enable_cpu_rmap);
+#endif
+
+void netif_napi_set_irq(struct napi_struct *napi, int irq)
+{
+#ifdef CONFIG_RFS_ACCEL
+	int rc;
+#endif
+	napi->irq = irq;
+
+#ifdef CONFIG_RFS_ACCEL
+	if (napi->dev->rx_cpu_rmap && napi->dev->rx_cpu_rmap_auto) {
+		rc = irq_cpu_rmap_add(napi->dev->rx_cpu_rmap, irq);
+		if (rc) {
+			netdev_warn(napi->dev, "Unable to update ARFS map (%d)\n",
+				    rc);
+			netif_disable_cpu_rmap(napi->dev);
+		}
+	}
+#endif
+}
+EXPORT_SYMBOL(netif_napi_set_irq);
+
 static void napi_restore_config(struct napi_struct *n)
 {
 	n->defer_hard_irqs = n->config->defer_hard_irqs;
@@ -11421,6 +11461,10 @@  void free_netdev(struct net_device *dev)
 	/* Flush device addresses */
 	dev_addr_flush(dev);
 
+#ifdef CONFIG_RFS_ACCEL
+	if (dev->rx_cpu_rmap && dev->rx_cpu_rmap_auto)
+		netif_disable_cpu_rmap(dev);
+#endif
 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
 		netif_napi_del(p);