@@ -6713,13 +6713,14 @@ static void napi_restore_config(struct napi_struct *n)
n->gro_flush_timeout = n->config->gro_flush_timeout;
n->irq_suspend_timeout = n->config->irq_suspend_timeout;
/* a NAPI ID might be stored in the config, if so use it. if not, use
- * napi_hash_add to generate one for us. It will be saved to the config
- * in napi_disable.
+ * napi_hash_add to generate one for us.
*/
- if (n->config->napi_id)
+ if (n->config->napi_id) {
napi_hash_add_with_id(n, n->config->napi_id);
- else
+ } else {
napi_hash_add(n);
+ n->config->napi_id = n->napi_id;
+ }
}
static void napi_save_config(struct napi_struct *n)
@@ -6727,10 +6728,39 @@ static void napi_save_config(struct napi_struct *n)
n->config->defer_hard_irqs = n->defer_hard_irqs;
n->config->gro_flush_timeout = n->gro_flush_timeout;
n->config->irq_suspend_timeout = n->irq_suspend_timeout;
- n->config->napi_id = n->napi_id;
napi_hash_del(n);
}
+/* Netlink wants the NAPI list to be sorted by ID, if adding a NAPI which will
+ * inherit an existing ID try to insert it at the right position.
+ */
+static void
+netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
+{
+ unsigned int new_id, pos_id;
+ struct list_head *higher;
+ struct napi_struct *pos;
+
+ new_id = UINT_MAX;
+ if (napi->config && napi->config->napi_id)
+ new_id = napi->config->napi_id;
+
+ higher = &dev->napi_list;
+ list_for_each_entry(pos, &dev->napi_list, dev_list) {
+ if (pos->napi_id >= MIN_NAPI_ID)
+ pos_id = pos->napi_id;
+ else if (pos->config)
+ pos_id = pos->config->napi_id;
+ else
+ pos_id = UINT_MAX;
+
+ if (pos_id <= new_id)
+ break;
+ higher = &pos->dev_list;
+ }
+ list_add_rcu(&napi->dev_list, higher); /* adds after higher */
+}
+
void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -6757,7 +6787,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
napi->list_owner = -1;
set_bit(NAPI_STATE_SCHED, &napi->state);
set_bit(NAPI_STATE_NPSVC, &napi->state);
- list_add_rcu(&napi->dev_list, &dev->napi_list);
+ netif_napi_dev_list_add(dev, napi);
/* default settings from sysfs are applied to all NAPIs. any per-NAPI
* configuration will be loaded in napi_enable
Netlink code depends on NAPI instances being sorted by ID on the netdev list for dump continuation. We need to be able to find the position on the list where we left off if dump does not fit in a single skb, and in the meantime NAPI instances can come and go. This was trivially true when we were assigning a new ID to every new NAPI instance. Since we added the NAPI config API, we try to retain the ID previously used for the same queue, but still add the new NAPI instance at the start of the list. This is fine if we reset the entire netdev and all NAPIs get removed and added back. If driver replaces a NAPI instance during an operation like DEVMEM queue reset, or recreates a subset of NAPI instances in other ways we may end up with broken ordering, and therefore Netlink dumps with either missing or duplicated entries. At this stage the problem is theoretical. Only two drivers support queue API, bnxt and gve. gve recreates NAPIs during queue reset, but it doesn't support NAPI config. bnxt supports NAPI config but doesn't recreate instances during reset. We need to save the ID in the config as soon as it is assigned because otherwise the new NAPI will not know what ID it will get at enable time, at the time it is being added. Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- net/core/dev.c | 42 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-)