Message ID | 20220516042456.3014395-4-eric.dumazet@gmail.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 39564c3fdc6684c6726b63e131d2a9f3809811cb |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: polish skb defer freeing | expand |
On Sun, 15 May 2022 21:24:55 -0700 Eric Dumazet wrote: > @@ -6494,16 +6495,21 @@ void skb_attempt_defer_free(struct sk_buff *skb) > int cpu = skb->alloc_cpu; > struct softnet_data *sd; > unsigned long flags; > + unsigned int defer_max; > bool kick; > > if (WARN_ON_ONCE(cpu >= nr_cpu_ids) || > !cpu_online(cpu) || > cpu == raw_smp_processor_id()) { > - __kfree_skb(skb); > +nodefer: __kfree_skb(skb); > return; > } > > sd = &per_cpu(softnet_data, cpu); > + defer_max = READ_ONCE(sysctl_skb_defer_max); > + if (READ_ONCE(sd->defer_count) >= defer_max) > + goto nodefer; > + > /* We do not send an IPI or any signal. > * Remote cpu will eventually call skb_defer_free_flush() > */ > @@ -6513,11 +6519,8 @@ void skb_attempt_defer_free(struct sk_buff *skb) > WRITE_ONCE(sd->defer_list, skb); > sd->defer_count++; > > - /* kick every time queue length reaches 128. > - * This condition should hardly be hit under normal conditions, > - * unless cpu suddenly stopped to receive NIC interrupts. > - */ > - kick = sd->defer_count == 128; > + /* Send an IPI every time queue reaches half capacity. */ > + kick = sd->defer_count == (defer_max >> 1); nit: it will behave a little strangely for defer_max == 1 we'll let one skb get onto the list and free the subsequent skbs directly but we'll never kick the IPI Moving the sd->defer_count++; should fix it and have no significant side effects. I think.
On Mon, May 16, 2022 at 1:39 PM Jakub Kicinski <kuba@kernel.org> wrote: > > On Sun, 15 May 2022 21:24:55 -0700 Eric Dumazet wrote: > > @@ -6494,16 +6495,21 @@ void skb_attempt_defer_free(struct sk_buff *skb) > > int cpu = skb->alloc_cpu; > > struct softnet_data *sd; > > unsigned long flags; > > + unsigned int defer_max; > > bool kick; > > > > if (WARN_ON_ONCE(cpu >= nr_cpu_ids) || > > !cpu_online(cpu) || > > cpu == raw_smp_processor_id()) { > > - __kfree_skb(skb); > > +nodefer: __kfree_skb(skb); > > return; > > } > > > > sd = &per_cpu(softnet_data, cpu); > > + defer_max = READ_ONCE(sysctl_skb_defer_max); > > + if (READ_ONCE(sd->defer_count) >= defer_max) > > + goto nodefer; > > + > > /* We do not send an IPI or any signal. > > * Remote cpu will eventually call skb_defer_free_flush() > > */ > > @@ -6513,11 +6519,8 @@ void skb_attempt_defer_free(struct sk_buff *skb) > > WRITE_ONCE(sd->defer_list, skb); > > sd->defer_count++; > > > > - /* kick every time queue length reaches 128. > > - * This condition should hardly be hit under normal conditions, > > - * unless cpu suddenly stopped to receive NIC interrupts. > > - */ > > - kick = sd->defer_count == 128; > > + /* Send an IPI every time queue reaches half capacity. */ > > + kick = sd->defer_count == (defer_max >> 1); > > nit: it will behave a little strangely for defer_max == 1 > we'll let one skb get onto the list and free the subsequent > skbs directly but we'll never kick the IPI Yes, I was aware of this, but decided it was not a big deal. Presumably people will be interested to disable the thing completely, I am not sure about defer_max == 1 > > Moving the sd->defer_count++; should fix it and have no significant > side effects. I think. SGTM, thanks !
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index f86b5e1623c6922b070fd7c62e83271ee9aee46c..fa4dcdb283cf8937df8414906b57949cc7a3c2bc 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -322,6 +322,14 @@ a leaked reference faster. A larger value may be useful to prevent false warnings on slow/loaded systems. Default value is 10, minimum 1, maximum 3600. +skb_defer_max +------------- + +Max size (in skbs) of the per-cpu list of skbs being freed +by the cpu which allocated them. Used by TCP stack so far. + +Default: 64 + optmem_max ---------- diff --git a/net/core/dev.c b/net/core/dev.c index 35b6d79b0c51412534dc3b3374b8d797d212f2d8..ac22fedfeaf72dc0d46f4793bbd9b2d5dd301730 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4329,6 +4329,7 @@ int netdev_max_backlog __read_mostly = 1000; EXPORT_SYMBOL(netdev_max_backlog); int netdev_tstamp_prequeue __read_mostly = 1; +unsigned int sysctl_skb_defer_max __read_mostly = 64; int netdev_budget __read_mostly = 300; /* Must be at least 2 jiffes to guarantee 1 jiffy timeout */ unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ; diff --git a/net/core/dev.h b/net/core/dev.h index 328b37af90ba9465d83c833dffd18547ddef4028..cbb8a925175a257f8ce2a27eebb02e03041eebb8 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -39,7 +39,7 @@ void dev_addr_check(struct net_device *dev); /* sysctls not referred to from outside net/core/ */ extern int netdev_budget; extern unsigned int netdev_budget_usecs; - +extern unsigned int sysctl_skb_defer_max; extern int netdev_tstamp_prequeue; extern int netdev_unregister_timeout_secs; extern int weight_p; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1e2180682f2e94c45e3f26059af6d18be2d9f9d3..ecb8fe7045a2f9c080cd0299ff7c0c1ea88d996b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -80,6 +80,7 @@ #include <linux/user_namespace.h> #include <linux/indirect_call_wrapper.h> +#include "dev.h" #include "sock_destructor.h" struct kmem_cache *skbuff_head_cache __ro_after_init; @@ -6494,16 +6495,21 @@ void skb_attempt_defer_free(struct sk_buff *skb) int cpu = skb->alloc_cpu; struct softnet_data *sd; unsigned long flags; + unsigned int defer_max; bool kick; if (WARN_ON_ONCE(cpu >= nr_cpu_ids) || !cpu_online(cpu) || cpu == raw_smp_processor_id()) { - __kfree_skb(skb); +nodefer: __kfree_skb(skb); return; } sd = &per_cpu(softnet_data, cpu); + defer_max = READ_ONCE(sysctl_skb_defer_max); + if (READ_ONCE(sd->defer_count) >= defer_max) + goto nodefer; + /* We do not send an IPI or any signal. * Remote cpu will eventually call skb_defer_free_flush() */ @@ -6513,11 +6519,8 @@ void skb_attempt_defer_free(struct sk_buff *skb) WRITE_ONCE(sd->defer_list, skb); sd->defer_count++; - /* kick every time queue length reaches 128. - * This condition should hardly be hit under normal conditions, - * unless cpu suddenly stopped to receive NIC interrupts. - */ - kick = sd->defer_count == 128; + /* Send an IPI every time queue reaches half capacity. */ + kick = sd->defer_count == (defer_max >> 1); spin_unlock_irqrestore(&sd->defer_lock, flags); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 195ca5c2877125bf420128d3c6465ac216f459e5..ca8d38325e1e1d7775d61893fab94ff9499ef5f8 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -578,6 +578,14 @@ static struct ctl_table net_core_table[] = { .extra1 = SYSCTL_ONE, .extra2 = &int_3600, }, + { + .procname = "skb_defer_max", + .data = &sysctl_skb_defer_max, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, { } };