Message ID | 20230823050609.2228718-2-mjguzik@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | execve scalability issues, part 1 | expand |
On Wed, Aug 23, 2023 at 07:06:08AM +0200, Mateusz Guzik wrote: > Allocations and frees are globally serialized on the pcpu lock (and the > CPU hotplug lock if enabled, which is the case on Debian). > > At least one frequent consumer allocates 4 back-to-back counters (and > frees them in the same manner), exacerbating the problem. > > While this does not fully remedy scalability issues, it is a step > towards that goal and provides immediate relief. > > Signed-off-by: Mateusz Guzik <mjguzik@gmail.com> I'm happy with this. There are a few minor reflow of lines that I'd like to do but other than that nice. If there are no other comments and it's okay with Andrew I'll pick this up tomorrow for-6.6 and the corresponding changes to fork.c. Reviewed-by: Dennis Zhou <dennis@kernel.org> Thanks, Dennis > --- > include/linux/percpu_counter.h | 39 ++++++++++++++++++---- > lib/percpu_counter.c | 61 +++++++++++++++++++++++----------- > 2 files changed, 74 insertions(+), 26 deletions(-) > > diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h > index 75b73c83bc9d..f1e7c987e3d3 100644 > --- a/include/linux/percpu_counter.h > +++ b/include/linux/percpu_counter.h > @@ -30,17 +30,27 @@ struct percpu_counter { > > extern int percpu_counter_batch; > > -int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, > - struct lock_class_key *key); > +int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, gfp_t gfp, > + u32 nr_counters, struct lock_class_key *key); > > -#define percpu_counter_init(fbc, value, gfp) \ > +#define percpu_counter_init_many(fbc, value, gfp, nr_counters) \ > ({ \ > static struct lock_class_key __key; \ > \ > - __percpu_counter_init(fbc, value, gfp, &__key); \ > + __percpu_counter_init_many(fbc, value, gfp, nr_counters,\ > + &__key); \ > }) > > -void percpu_counter_destroy(struct percpu_counter *fbc); > + > +#define percpu_counter_init(fbc, value, gfp) \ > + percpu_counter_init_many(fbc, value, gfp, 1) > + > +void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters); > +static inline void percpu_counter_destroy(struct percpu_counter *fbc) > +{ > + percpu_counter_destroy_many(fbc, 1); > +} > + > void percpu_counter_set(struct percpu_counter *fbc, s64 amount); > void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, > s32 batch); > @@ -116,11 +126,26 @@ struct percpu_counter { > s64 count; > }; > > +static inline int percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, > + gfp_t gfp, u32 nr_counters) > +{ > + u32 i; > + > + for (i = 0; i < nr_counters; i++) > + fbc[i].count = amount; > + > + return 0; > +} > + > static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount, > gfp_t gfp) > { > - fbc->count = amount; > - return 0; > + return percpu_counter_init_many(fbc, amount, gfp, 1); > +} > + > +static inline void percpu_counter_destroy_many(struct percpu_counter *fbc, > + u32 nr_counters) > +{ > } > > static inline void percpu_counter_destroy(struct percpu_counter *fbc) > diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c > index 5004463c4f9f..9338b27f1cdd 100644 > --- a/lib/percpu_counter.c > +++ b/lib/percpu_counter.c > @@ -151,48 +151,71 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc) > } > EXPORT_SYMBOL(__percpu_counter_sum); > > -int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, > - struct lock_class_key *key) > +int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, gfp_t gfp, > + u32 nr_counters, struct lock_class_key *key) > { > unsigned long flags __maybe_unused; > - > - raw_spin_lock_init(&fbc->lock); > - lockdep_set_class(&fbc->lock, key); > - fbc->count = amount; > - fbc->counters = alloc_percpu_gfp(s32, gfp); > - if (!fbc->counters) > + size_t counter_size; > + s32 __percpu *counters; > + u32 i; > + > + counter_size = ALIGN(sizeof(*counters), __alignof__(*counters)); > + counters = __alloc_percpu_gfp(nr_counters * counter_size, > + __alignof__(*counters), gfp); > + if (!counters) { > + fbc[0].counters = NULL; > return -ENOMEM; > + } > > - debug_percpu_counter_activate(fbc); > + for (i = 0; i < nr_counters; i++) { > + raw_spin_lock_init(&fbc[i].lock); > + lockdep_set_class(&fbc[i].lock, key); > +#ifdef CONFIG_HOTPLUG_CPU > + INIT_LIST_HEAD(&fbc[i].list); > +#endif > + fbc[i].count = amount; > + fbc[i].counters = (void *)counters + (i * counter_size); > + > + debug_percpu_counter_activate(&fbc[i]); > + } > > #ifdef CONFIG_HOTPLUG_CPU > - INIT_LIST_HEAD(&fbc->list); > spin_lock_irqsave(&percpu_counters_lock, flags); > - list_add(&fbc->list, &percpu_counters); > + for (i = 0; i < nr_counters; i++) > + list_add(&fbc[i].list, &percpu_counters); > spin_unlock_irqrestore(&percpu_counters_lock, flags); > #endif > return 0; > } > -EXPORT_SYMBOL(__percpu_counter_init); > +EXPORT_SYMBOL(__percpu_counter_init_many); > > -void percpu_counter_destroy(struct percpu_counter *fbc) > +void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters) > { > unsigned long flags __maybe_unused; > + u32 i; > + > + if (WARN_ON_ONCE(!fbc)) > + return; > > - if (!fbc->counters) > + if (!fbc[0].counters) > return; > > - debug_percpu_counter_deactivate(fbc); > + for (i = 0; i < nr_counters; i++) > + debug_percpu_counter_deactivate(&fbc[i]); > > #ifdef CONFIG_HOTPLUG_CPU > spin_lock_irqsave(&percpu_counters_lock, flags); > - list_del(&fbc->list); > + for (i = 0; i < nr_counters; i++) > + list_del(&fbc[i].list); > spin_unlock_irqrestore(&percpu_counters_lock, flags); > #endif > - free_percpu(fbc->counters); > - fbc->counters = NULL; > + > + free_percpu(fbc[0].counters); > + > + for (i = 0; i < nr_counters; i++) > + fbc[i].counters = NULL; > } > -EXPORT_SYMBOL(percpu_counter_destroy); > +EXPORT_SYMBOL(percpu_counter_destroy_many); > > int percpu_counter_batch __read_mostly = 32; > EXPORT_SYMBOL(percpu_counter_batch); > -- > 2.41.0 >
On 8/24/23 08:26, Dennis Zhou wrote: > On Wed, Aug 23, 2023 at 07:06:08AM +0200, Mateusz Guzik wrote: >> Allocations and frees are globally serialized on the pcpu lock (and the >> CPU hotplug lock if enabled, which is the case on Debian). >> >> At least one frequent consumer allocates 4 back-to-back counters (and >> frees them in the same manner), exacerbating the problem. >> >> While this does not fully remedy scalability issues, it is a step >> towards that goal and provides immediate relief. >> >> Signed-off-by: Mateusz Guzik <mjguzik@gmail.com> > > I'm happy with this. There are a few minor reflow of lines that I'd like > to do but other than that nice. > > If there are no other comments and it's okay with Andrew I'll pick this > up tomorrow for-6.6 and the corresponding changes to fork.c. > > Reviewed-by: Dennis Zhou <dennis@kernel.org> FWIW, the new version also looks correct to me. Reviewed-by: Vegard Nossum <vegard.nossum@oracle.com> Vegard
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 75b73c83bc9d..f1e7c987e3d3 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -30,17 +30,27 @@ struct percpu_counter { extern int percpu_counter_batch; -int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, - struct lock_class_key *key); +int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, gfp_t gfp, + u32 nr_counters, struct lock_class_key *key); -#define percpu_counter_init(fbc, value, gfp) \ +#define percpu_counter_init_many(fbc, value, gfp, nr_counters) \ ({ \ static struct lock_class_key __key; \ \ - __percpu_counter_init(fbc, value, gfp, &__key); \ + __percpu_counter_init_many(fbc, value, gfp, nr_counters,\ + &__key); \ }) -void percpu_counter_destroy(struct percpu_counter *fbc); + +#define percpu_counter_init(fbc, value, gfp) \ + percpu_counter_init_many(fbc, value, gfp, 1) + +void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters); +static inline void percpu_counter_destroy(struct percpu_counter *fbc) +{ + percpu_counter_destroy_many(fbc, 1); +} + void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch); @@ -116,11 +126,26 @@ struct percpu_counter { s64 count; }; +static inline int percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, + gfp_t gfp, u32 nr_counters) +{ + u32 i; + + for (i = 0; i < nr_counters; i++) + fbc[i].count = amount; + + return 0; +} + static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp) { - fbc->count = amount; - return 0; + return percpu_counter_init_many(fbc, amount, gfp, 1); +} + +static inline void percpu_counter_destroy_many(struct percpu_counter *fbc, + u32 nr_counters) +{ } static inline void percpu_counter_destroy(struct percpu_counter *fbc) diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 5004463c4f9f..9338b27f1cdd 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -151,48 +151,71 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc) } EXPORT_SYMBOL(__percpu_counter_sum); -int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, - struct lock_class_key *key) +int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, gfp_t gfp, + u32 nr_counters, struct lock_class_key *key) { unsigned long flags __maybe_unused; - - raw_spin_lock_init(&fbc->lock); - lockdep_set_class(&fbc->lock, key); - fbc->count = amount; - fbc->counters = alloc_percpu_gfp(s32, gfp); - if (!fbc->counters) + size_t counter_size; + s32 __percpu *counters; + u32 i; + + counter_size = ALIGN(sizeof(*counters), __alignof__(*counters)); + counters = __alloc_percpu_gfp(nr_counters * counter_size, + __alignof__(*counters), gfp); + if (!counters) { + fbc[0].counters = NULL; return -ENOMEM; + } - debug_percpu_counter_activate(fbc); + for (i = 0; i < nr_counters; i++) { + raw_spin_lock_init(&fbc[i].lock); + lockdep_set_class(&fbc[i].lock, key); +#ifdef CONFIG_HOTPLUG_CPU + INIT_LIST_HEAD(&fbc[i].list); +#endif + fbc[i].count = amount; + fbc[i].counters = (void *)counters + (i * counter_size); + + debug_percpu_counter_activate(&fbc[i]); + } #ifdef CONFIG_HOTPLUG_CPU - INIT_LIST_HEAD(&fbc->list); spin_lock_irqsave(&percpu_counters_lock, flags); - list_add(&fbc->list, &percpu_counters); + for (i = 0; i < nr_counters; i++) + list_add(&fbc[i].list, &percpu_counters); spin_unlock_irqrestore(&percpu_counters_lock, flags); #endif return 0; } -EXPORT_SYMBOL(__percpu_counter_init); +EXPORT_SYMBOL(__percpu_counter_init_many); -void percpu_counter_destroy(struct percpu_counter *fbc) +void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters) { unsigned long flags __maybe_unused; + u32 i; + + if (WARN_ON_ONCE(!fbc)) + return; - if (!fbc->counters) + if (!fbc[0].counters) return; - debug_percpu_counter_deactivate(fbc); + for (i = 0; i < nr_counters; i++) + debug_percpu_counter_deactivate(&fbc[i]); #ifdef CONFIG_HOTPLUG_CPU spin_lock_irqsave(&percpu_counters_lock, flags); - list_del(&fbc->list); + for (i = 0; i < nr_counters; i++) + list_del(&fbc[i].list); spin_unlock_irqrestore(&percpu_counters_lock, flags); #endif - free_percpu(fbc->counters); - fbc->counters = NULL; + + free_percpu(fbc[0].counters); + + for (i = 0; i < nr_counters; i++) + fbc[i].counters = NULL; } -EXPORT_SYMBOL(percpu_counter_destroy); +EXPORT_SYMBOL(percpu_counter_destroy_many); int percpu_counter_batch __read_mostly = 32; EXPORT_SYMBOL(percpu_counter_batch);
Allocations and frees are globally serialized on the pcpu lock (and the CPU hotplug lock if enabled, which is the case on Debian). At least one frequent consumer allocates 4 back-to-back counters (and frees them in the same manner), exacerbating the problem. While this does not fully remedy scalability issues, it is a step towards that goal and provides immediate relief. Signed-off-by: Mateusz Guzik <mjguzik@gmail.com> --- include/linux/percpu_counter.h | 39 ++++++++++++++++++---- lib/percpu_counter.c | 61 +++++++++++++++++++++++----------- 2 files changed, 74 insertions(+), 26 deletions(-)