@@ -427,8 +427,10 @@ rps_dev_flow_table. The stack consults a CPU to hardware queue map which
is maintained by the NIC driver. This is an auto-generated reverse map of
the IRQ affinity table shown by /proc/interrupts. Drivers can use
functions in the cpu_rmap (“CPU affinity reverse map”) kernel library
-to populate the map. For each CPU, the corresponding queue in the map is
-set to be one whose processing CPU is closest in cache locality.
+to populate the map. Alternatively, drivers can delegate the cpu_rmap
+management to the Kernel by calling netif_enable_cpu_rmap(). For each CPU,
+the corresponding queue in the map is set to be one whose processing CPU is
+closest in cache locality.
Accelerated RFS Configuration
@@ -5,9 +5,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#ifdef CONFIG_RFS_ACCEL
-#include <linux/cpu_rmap.h>
-#endif /* CONFIG_RFS_ACCEL */
#include <linux/ethtool.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -162,30 +159,6 @@ int ena_xmit_common(struct ena_adapter *adapter,
return 0;
}
-static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
-{
-#ifdef CONFIG_RFS_ACCEL
- u32 i;
- int rc;
-
- adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues);
- if (!adapter->netdev->rx_cpu_rmap)
- return -ENOMEM;
- for (i = 0; i < adapter->num_io_queues; i++) {
- int irq_idx = ENA_IO_IRQ_IDX(i);
-
- rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
- pci_irq_vector(adapter->pdev, irq_idx));
- if (rc) {
- free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
- adapter->netdev->rx_cpu_rmap = NULL;
- return rc;
- }
- }
-#endif /* CONFIG_RFS_ACCEL */
- return 0;
-}
-
static void ena_init_io_rings_common(struct ena_adapter *adapter,
struct ena_ring *ring, u16 qid)
{
@@ -1596,7 +1569,7 @@ static int ena_enable_msix(struct ena_adapter *adapter)
adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
}
- if (ena_init_rx_cpu_rmap(adapter))
+ if (netif_enable_cpu_rmap(adapter->netdev, adapter->num_io_queues))
netif_warn(adapter, probe, adapter->netdev,
"Failed to map IRQs to CPUs\n");
@@ -1742,13 +1715,6 @@ static void ena_free_io_irq(struct ena_adapter *adapter)
struct ena_irq *irq;
int i;
-#ifdef CONFIG_RFS_ACCEL
- if (adapter->msix_vecs >= 1) {
- free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
- adapter->netdev->rx_cpu_rmap = NULL;
- }
-#endif /* CONFIG_RFS_ACCEL */
-
for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
irq = &adapter->irq_tbl[i];
irq_set_affinity_hint(irq->vector, NULL);
@@ -4131,13 +4097,6 @@ static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
ena_dev = adapter->ena_dev;
netdev = adapter->netdev;
-#ifdef CONFIG_RFS_ACCEL
- if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
- free_irq_cpu_rmap(netdev->rx_cpu_rmap);
- netdev->rx_cpu_rmap = NULL;
- }
-
-#endif /* CONFIG_RFS_ACCEL */
/* Make sure timer and reset routine won't be called after
* freeing device resources.
*/
@@ -49,7 +49,6 @@
#include <linux/cache.h>
#include <linux/log2.h>
#include <linux/bitmap.h>
-#include <linux/cpu_rmap.h>
#include <linux/cpumask.h>
#include <net/pkt_cls.h>
#include <net/page_pool/helpers.h>
@@ -10886,10 +10885,8 @@ static int bnxt_set_real_num_queues(struct bnxt *bp)
if (rc)
return rc;
-#ifdef CONFIG_RFS_ACCEL
if (bp->flags & BNXT_FLAG_RFS)
- dev->rx_cpu_rmap = alloc_irq_cpu_rmap(bp->rx_nr_rings);
-#endif
+ return netif_enable_cpu_rmap(dev, bp->rx_nr_rings);
return rc;
}
@@ -11242,10 +11239,6 @@ static void bnxt_free_irq(struct bnxt *bp)
struct bnxt_irq *irq;
int i;
-#ifdef CONFIG_RFS_ACCEL
- free_irq_cpu_rmap(bp->dev->rx_cpu_rmap);
- bp->dev->rx_cpu_rmap = NULL;
-#endif
if (!bp->irq_tbl || !bp->bnapi)
return;
@@ -11268,11 +11261,8 @@ static void bnxt_free_irq(struct bnxt *bp)
static int bnxt_request_irq(struct bnxt *bp)
{
- int i, j, rc = 0;
+ int i, rc = 0;
unsigned long flags = 0;
-#ifdef CONFIG_RFS_ACCEL
- struct cpu_rmap *rmap;
-#endif
rc = bnxt_setup_int_mode(bp);
if (rc) {
@@ -11280,22 +11270,11 @@ static int bnxt_request_irq(struct bnxt *bp)
rc);
return rc;
}
-#ifdef CONFIG_RFS_ACCEL
- rmap = bp->dev->rx_cpu_rmap;
-#endif
- for (i = 0, j = 0; i < bp->cp_nr_rings; i++) {
+
+ for (i = 0; i < bp->cp_nr_rings; i++) {
int map_idx = bnxt_cp_num_to_irq_num(bp, i);
struct bnxt_irq *irq = &bp->irq_tbl[map_idx];
-#ifdef CONFIG_RFS_ACCEL
- if (rmap && bp->bnapi[i]->rx_ring) {
- rc = irq_cpu_rmap_add(rmap, irq->vector);
- if (rc)
- netdev_warn(bp->dev, "failed adding irq rmap for ring %d\n",
- j);
- j++;
- }
-#endif
rc = request_irq(irq->vector, irq->handler, flags, irq->name,
bp->bnapi[i]);
if (rc)
@@ -584,9 +584,6 @@ void ice_free_cpu_rx_rmap(struct ice_vsi *vsi)
netdev = vsi->netdev;
if (!netdev || !netdev->rx_cpu_rmap)
return;
-
- free_irq_cpu_rmap(netdev->rx_cpu_rmap);
- netdev->rx_cpu_rmap = NULL;
}
/**
@@ -597,7 +594,6 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)
{
struct net_device *netdev;
struct ice_pf *pf;
- int i;
if (!vsi || vsi->type != ICE_VSI_PF)
return 0;
@@ -610,18 +606,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)
netdev_dbg(netdev, "Setup CPU RMAP: vsi type 0x%x, ifname %s, q_vectors %d\n",
vsi->type, netdev->name, vsi->num_q_vectors);
- netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(vsi->num_q_vectors);
- if (unlikely(!netdev->rx_cpu_rmap))
- return -EINVAL;
-
- ice_for_each_q_vector(vsi, i)
- if (irq_cpu_rmap_add(netdev->rx_cpu_rmap,
- vsi->q_vectors[i]->irq.virq)) {
- ice_free_cpu_rx_rmap(vsi);
- return -EINVAL;
- }
-
- return 0;
+ return netif_enable_cpu_rmap(netdev, vsi->num_q_vectors);
}
/**
@@ -32,6 +32,7 @@ struct cpu_rmap {
#define CPU_RMAP_DIST_INF 0xffff
extern struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags);
+extern void cpu_rmap_get(struct cpu_rmap *rmap);
extern int cpu_rmap_put(struct cpu_rmap *rmap);
extern int cpu_rmap_add(struct cpu_rmap *rmap, void *obj);
@@ -394,6 +394,10 @@ struct napi_struct {
struct list_head dev_list;
struct hlist_node napi_hash_node;
int irq;
+#ifdef CONFIG_RFS_ACCEL
+ struct irq_affinity_notify notify;
+ int napi_rmap_idx;
+#endif
int index;
struct napi_config *config;
};
@@ -1988,6 +1992,9 @@ enum netdev_reg_state {
*
* @threaded: napi threaded mode is enabled
*
+ * @rx_cpu_rmap_auto: driver wants the core to manage the ARFS rmap.
+ * Set by calling netif_enable_cpu_rmap().
+ *
* @see_all_hwtstamp_requests: device wants to see calls to
* ndo_hwtstamp_set() for all timestamp requests
* regardless of source, even if those aren't
@@ -2395,6 +2402,7 @@ struct net_device {
struct lock_class_key *qdisc_tx_busylock;
bool proto_down;
bool threaded;
+ bool rx_cpu_rmap_auto;
/* priv_flags_slow, ungrouped to save space */
unsigned long see_all_hwtstamp_requests:1;
@@ -2717,10 +2725,7 @@ static inline void netdev_assert_locked_or_invisible(struct net_device *dev)
netdev_assert_locked(dev);
}
-static inline void netif_napi_set_irq_locked(struct napi_struct *napi, int irq)
-{
- napi->irq = irq;
-}
+void netif_napi_set_irq_locked(struct napi_struct *napi, int irq);
static inline void netif_napi_set_irq(struct napi_struct *napi, int irq)
{
@@ -2858,6 +2863,8 @@ static inline void netif_napi_del(struct napi_struct *napi)
synchronize_net();
}
+int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs);
+
struct packet_type {
__be16 type; /* This is really htons(ether_type). */
bool ignore_outgoing;
@@ -73,7 +73,7 @@ static void cpu_rmap_release(struct kref *ref)
* cpu_rmap_get - internal helper to get new ref on a cpu_rmap
* @rmap: reverse-map allocated with alloc_cpu_rmap()
*/
-static inline void cpu_rmap_get(struct cpu_rmap *rmap)
+void cpu_rmap_get(struct cpu_rmap *rmap)
{
kref_get(&rmap->refcount);
}
@@ -6866,6 +6866,123 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
}
EXPORT_SYMBOL(netif_queue_set_napi);
+#ifdef CONFIG_RFS_ACCEL
+static void
+netif_irq_cpu_rmap_notify(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+{
+ struct napi_struct *napi =
+ container_of(notify, struct napi_struct, notify);
+ struct cpu_rmap *rmap = napi->dev->rx_cpu_rmap;
+ int err;
+
+ err = cpu_rmap_update(rmap, napi->napi_rmap_idx, mask);
+ if (err)
+ netdev_warn(napi->dev, "RMAP update failed (%d)\n",
+ err);
+}
+
+static void netif_napi_affinity_release(struct kref *ref)
+{
+ struct napi_struct *napi =
+ container_of(ref, struct napi_struct, notify.kref);
+ struct cpu_rmap *rmap = napi->dev->rx_cpu_rmap;
+
+ rmap->obj[napi->napi_rmap_idx] = NULL;
+ napi->napi_rmap_idx = -1;
+ cpu_rmap_put(rmap);
+}
+
+static int napi_irq_cpu_rmap_add(struct napi_struct *napi, int irq)
+{
+ struct cpu_rmap *rmap = napi->dev->rx_cpu_rmap;
+ int rc;
+
+ napi->notify.notify = netif_irq_cpu_rmap_notify;
+ napi->notify.release = netif_napi_affinity_release;
+ cpu_rmap_get(rmap);
+ rc = cpu_rmap_add(rmap, napi);
+ if (rc < 0)
+ goto err_add;
+
+ napi->napi_rmap_idx = rc;
+ rc = irq_set_affinity_notifier(irq, &napi->notify);
+ if (rc)
+ goto err_set;
+
+ return 0;
+
+err_set:
+ rmap->obj[napi->napi_rmap_idx] = NULL;
+ napi->napi_rmap_idx = -1;
+err_add:
+ cpu_rmap_put(rmap);
+ return rc;
+}
+
+int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs)
+{
+ if (dev->rx_cpu_rmap_auto)
+ return 0;
+
+ dev->rx_cpu_rmap = alloc_irq_cpu_rmap(num_irqs);
+ if (!dev->rx_cpu_rmap)
+ return -ENOMEM;
+
+ dev->rx_cpu_rmap_auto = true;
+ return 0;
+}
+EXPORT_SYMBOL(netif_enable_cpu_rmap);
+
+static void netif_del_cpu_rmap(struct net_device *dev)
+{
+ struct cpu_rmap *rmap = dev->rx_cpu_rmap;
+
+ if (!dev->rx_cpu_rmap_auto)
+ return;
+
+ /* Free the rmap */
+ cpu_rmap_put(rmap);
+ dev->rx_cpu_rmap = NULL;
+ dev->rx_cpu_rmap_auto = false;
+}
+
+#else
+static int napi_irq_cpu_rmap_add(struct napi_struct *napi, int irq)
+{
+ return 0;
+}
+
+int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs)
+{
+ return 0;
+}
+EXPORT_SYMBOL(netif_enable_cpu_rmap);
+
+static void netif_del_cpu_rmap(struct net_device *dev)
+{
+}
+#endif
+
+void netif_napi_set_irq_locked(struct napi_struct *napi, int irq)
+{
+ int rc;
+
+ /* Remove existing rmap entries */
+ if (napi->dev->rx_cpu_rmap_auto &&
+ napi->irq != irq && napi->irq > 0)
+ irq_set_affinity_notifier(napi->irq, NULL);
+
+ napi->irq = irq;
+ if (irq > 0) {
+ rc = napi_irq_cpu_rmap_add(napi, irq);
+ if (rc)
+ netdev_warn(napi->dev, "Unable to update ARFS map (%d)\n",
+ rc);
+ }
+}
+EXPORT_SYMBOL(netif_napi_set_irq_locked);
+
static void napi_restore_config(struct napi_struct *n)
{
n->defer_hard_irqs = n->config->defer_hard_irqs;
@@ -6995,6 +7112,9 @@ void napi_disable_locked(struct napi_struct *n)
else
napi_hash_del(n);
+ if (n->irq > 0 && n->dev->rx_cpu_rmap_auto)
+ irq_set_affinity_notifier(n->irq, NULL);
+
clear_bit(NAPI_STATE_DISABLE, &n->state);
}
EXPORT_SYMBOL(napi_disable_locked);
@@ -11610,6 +11730,8 @@ void free_netdev(struct net_device *dev)
netdev_napi_exit(dev);
+ netif_del_cpu_rmap(dev);
+
ref_tracker_dir_exit(&dev->refcnt_tracker);
#ifdef CONFIG_PCPU_DEV_REFCNT
free_percpu(dev->pcpu_refcnt);