@@ -98,7 +98,6 @@ struct_netdev_queue* _rx read_mostly
unsigned_int num_rx_queues
unsigned_int real_num_rx_queues - read_mostly get_rps_cpu
struct_bpf_prog* xdp_prog - read_mostly netif_elide_gro()
-unsigned_long gro_flush_timeout - read_mostly napi_complete_done
unsigned_int gro_max_size - read_mostly skb_gro_receive
unsigned_int gro_ipv4_max_size - read_mostly skb_gro_receive
rx_handler_func_t* rx_handler read_mostly - __netif_receive_skb_core
@@ -182,4 +181,5 @@ struct_devlink_port* devlink_port
struct_dpll_pin* dpll_pin
struct hlist_head page_pools
struct dim_irq_moder* irq_moder
+unsigned_long gro_flush_timeout
u32 napi_defer_hard_irqs
@@ -438,7 +438,7 @@ struct idpf_q_vector {
__cacheline_group_end_aligned(cold);
};
libeth_cacheline_set_assert(struct idpf_q_vector, 112,
- 424 + 2 * sizeof(struct dim),
+ 440 + 2 * sizeof(struct dim),
8 + sizeof(cpumask_var_t));
struct idpf_rx_queue_stats {
@@ -377,6 +377,7 @@ struct napi_struct {
struct list_head dev_list;
struct hlist_node napi_hash_node;
int irq;
+ unsigned long gro_flush_timeout;
u32 defer_hard_irqs;
};
@@ -2075,7 +2076,6 @@ struct net_device {
int ifindex;
unsigned int real_num_rx_queues;
struct netdev_rx_queue *_rx;
- unsigned long gro_flush_timeout;
unsigned int gro_max_size;
unsigned int gro_ipv4_max_size;
rx_handler_func_t __rcu *rx_handler;
@@ -2398,6 +2398,7 @@ struct net_device {
/** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */
struct dim_irq_moder *irq_moder;
+ unsigned long gro_flush_timeout;
u32 napi_defer_hard_irqs;
u8 priv[] ____cacheline_aligned
@@ -6226,12 +6226,12 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
if (work_done) {
if (n->gro_bitmask)
- timeout = READ_ONCE(n->dev->gro_flush_timeout);
+ timeout = napi_get_gro_flush_timeout(n);
n->defer_hard_irqs_count = napi_get_defer_hard_irqs(n);
}
if (n->defer_hard_irqs_count > 0) {
n->defer_hard_irqs_count--;
- timeout = READ_ONCE(n->dev->gro_flush_timeout);
+ timeout = napi_get_gro_flush_timeout(n);
if (timeout)
ret = false;
}
@@ -6366,7 +6366,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
if (flags & NAPI_F_PREFER_BUSY_POLL) {
napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi);
- timeout = READ_ONCE(napi->dev->gro_flush_timeout);
+ timeout = napi_get_gro_flush_timeout(napi);
if (napi->defer_hard_irqs_count && timeout) {
hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
skip_schedule = true;
@@ -6648,6 +6648,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
napi->timer.function = napi_watchdog;
napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs));
+ napi_set_gro_flush_timeout(napi, READ_ONCE(dev->gro_flush_timeout));
init_gro_hash(napi);
napi->skb = NULL;
INIT_LIST_HEAD(&napi->rx_list);
@@ -11053,7 +11054,7 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
WARN_ON(dev->reg_state == NETREG_REGISTERED);
if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
- dev->gro_flush_timeout = 20000;
+ netdev_set_gro_flush_timeout(dev, 20000);
netdev_set_defer_hard_irqs(dev, 1);
}
}
@@ -11991,7 +11992,6 @@ static void __init net_dev_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ifindex);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx);
- CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler);
@@ -12003,7 +12003,7 @@ static void __init net_dev_struct_check(void)
#ifdef CONFIG_NET_XGRESS
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress);
#endif
- CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 100);
+ CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 92);
}
/*
@@ -174,6 +174,46 @@ static inline void netdev_set_defer_hard_irqs(struct net_device *netdev,
napi_set_defer_hard_irqs(napi, defer);
}
+/**
+ * napi_get_gro_flush_timeout - get the gro_flush_timeout
+ * @n: napi struct to get the gro_flush_timeout from
+ *
+ * Return: the per-NAPI value of the gro_flush_timeout field.
+ */
+static inline unsigned long
+napi_get_gro_flush_timeout(const struct napi_struct *n)
+{
+ return READ_ONCE(n->gro_flush_timeout);
+}
+
+/**
+ * napi_set_gro_flush_timeout - set the gro_flush_timeout for a napi
+ * @n: napi struct to set the gro_flush_timeout
+ * @timeout: timeout value to set
+ *
+ * napi_set_gro_flush_timeout sets the per-NAPI gro_flush_timeout
+ */
+static inline void napi_set_gro_flush_timeout(struct napi_struct *n,
+ unsigned long timeout)
+{
+ WRITE_ONCE(n->gro_flush_timeout, timeout);
+}
+
+/**
+ * netdev_set_gro_flush_timeout - set gro_flush_timeout of a netdev's NAPIs
+ * @netdev: the net_device for which all NAPIs will have gro_flush_timeout set
+ * @timeout: the timeout value to set
+ */
+static inline void netdev_set_gro_flush_timeout(struct net_device *netdev,
+ unsigned long timeout)
+{
+ struct napi_struct *napi;
+
+ WRITE_ONCE(netdev->gro_flush_timeout, timeout);
+ list_for_each_entry(napi, &netdev->napi_list, dev_list)
+ napi_set_gro_flush_timeout(napi, timeout);
+}
+
int rps_cpumask_housekeeping(struct cpumask *mask);
#if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
@@ -409,7 +409,7 @@ NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);
static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
{
- WRITE_ONCE(dev->gro_flush_timeout, val);
+ netdev_set_gro_flush_timeout(dev, val);
return 0;
}
Allow per-NAPI gro_flush_timeout setting. The existing sysfs parameter is respected; writes to sysfs will write to all NAPI structs for the device and the net_device gro_flush_timeout field. Reads from sysfs will read from the net_device field. The ability to set gro_flush_timeout on specific NAPI instances will be added in a later commit, via netdev-genl. Note that idpf has embedded napi_struct in its internals and has established some series of asserts that involve the size of napi structure. Since this change increases the napi_struct size from 400 to 416 (according to pahole on my system), I've increased the assertion in idpf by 16 bytes. No attention whatsoever was paid to the cacheline placement of idpf internals as a result of this change. Signed-off-by: Joe Damato <jdamato@fastly.com> --- .../networking/net_cachelines/net_device.rst | 2 +- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 2 +- include/linux/netdevice.h | 3 +- net/core/dev.c | 12 +++--- net/core/dev.h | 40 +++++++++++++++++++ net/core/net-sysfs.c | 2 +- 6 files changed, 51 insertions(+), 10 deletions(-)