Message ID | 20201030013820.29758-1-vvghjk1234@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | percpu: Reduce the number of cpu distance comparisions | expand |
Hello, On Fri, Oct 30, 2020 at 10:38:20AM +0900, Wonhuyk Yang wrote: > From: Wonhyuk Yang <vvghjk1234@gmail.com> > > To build group_map[] and group_cnt[], we find out which group > CPUs belong to by comparing the distance of the cpu. However, > this includes cases where comparisons are not required. > > This patch uses a bitmap to record CPUs that is not classified in > the group. CPUs that we know which group they belong to should be > cleared from the bitmap. In result, we can reduce the number of > unnecessary comparisons. > > Signed-off-by: Wonhyuk Yang <vvghjk1234@gmail.com> > --- > mm/percpu.c | 32 ++++++++++++++++++-------------- > 1 file changed, 18 insertions(+), 14 deletions(-) > > diff --git a/mm/percpu.c b/mm/percpu.c > index 66a93f096394..d19ca484eee4 100644 > --- a/mm/percpu.c > +++ b/mm/percpu.c > @@ -2669,6 +2669,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info( > { > static int group_map[NR_CPUS] __initdata; > static int group_cnt[NR_CPUS] __initdata; > + static struct cpumask mask __initdata; > const size_t static_size = __per_cpu_end - __per_cpu_start; > int nr_groups = 1, nr_units = 0; > size_t size_sum, min_unit_size, alloc_size; > @@ -2702,24 +2703,27 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info( > upa--; > max_upa = upa; > > + cpumask_copy(&mask, cpu_possible_mask); > + > /* group cpus according to their proximity */ > - for_each_possible_cpu(cpu) { > - group = 0; > - next_group: > - for_each_possible_cpu(tcpu) { > - if (cpu == tcpu) > - break; > - if (group_map[tcpu] == group && cpu_distance_fn && > - (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || > - cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { > - group++; > - nr_groups = max(nr_groups, group + 1); > - goto next_group; > - } > - } > + for (group = 0; !cpumask_empty(&mask); group++) { > + /* pop the group's first cpu */ > + cpu = cpumask_first(&mask); > group_map[cpu] = group; > group_cnt[group]++; > + cpumask_clear_cpu(cpu, &mask); > + > + for_each_cpu(tcpu, &mask) { > + if (!cpu_distance_fn || > + (cpu_distance_fn(cpu, tcpu) == LOCAL_DISTANCE && > + cpu_distance_fn(tcpu, cpu) == LOCAL_DISTANCE)) { > + group_map[tcpu] = group; > + group_cnt[group]++; > + cpumask_clear_cpu(tcpu, &mask); > + } > + } > } > + nr_groups = group; > > /* > * Wasted space is caused by a ratio imbalance of upa to group_cnt. > -- > 2.17.1 > Sorry for the delay. It's been a little bit of a busy week for me and it always takes me a moment to wrap my head around this code. I've applied this to percpu#for-5.11. Thanks, Dennis
diff --git a/mm/percpu.c b/mm/percpu.c index 66a93f096394..d19ca484eee4 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -2669,6 +2669,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info( { static int group_map[NR_CPUS] __initdata; static int group_cnt[NR_CPUS] __initdata; + static struct cpumask mask __initdata; const size_t static_size = __per_cpu_end - __per_cpu_start; int nr_groups = 1, nr_units = 0; size_t size_sum, min_unit_size, alloc_size; @@ -2702,24 +2703,27 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info( upa--; max_upa = upa; + cpumask_copy(&mask, cpu_possible_mask); + /* group cpus according to their proximity */ - for_each_possible_cpu(cpu) { - group = 0; - next_group: - for_each_possible_cpu(tcpu) { - if (cpu == tcpu) - break; - if (group_map[tcpu] == group && cpu_distance_fn && - (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || - cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { - group++; - nr_groups = max(nr_groups, group + 1); - goto next_group; - } - } + for (group = 0; !cpumask_empty(&mask); group++) { + /* pop the group's first cpu */ + cpu = cpumask_first(&mask); group_map[cpu] = group; group_cnt[group]++; + cpumask_clear_cpu(cpu, &mask); + + for_each_cpu(tcpu, &mask) { + if (!cpu_distance_fn || + (cpu_distance_fn(cpu, tcpu) == LOCAL_DISTANCE && + cpu_distance_fn(tcpu, cpu) == LOCAL_DISTANCE)) { + group_map[tcpu] = group; + group_cnt[group]++; + cpumask_clear_cpu(tcpu, &mask); + } + } } + nr_groups = group; /* * Wasted space is caused by a ratio imbalance of upa to group_cnt.