diff mbox series

[15/16] sched/topology: optimize topology_span_sane()

Message ID 20220718192844.1805158-16-yury.norov@gmail.com (mailing list archive)
State Not Applicable
Delegated to: Netdev Maintainers
Headers show
Series Introduce DEBUG_BITMAP config option and bitmap_check_params() | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Yury Norov July 18, 2022, 7:28 p.m. UTC
topology_span_sane() checks if cpu == i before calling
	cpumask_equal(tl->mask(cpu), tl->mask(i)).

However, tl->mask(cpu) and tl->mask(i) may point to the same cpumask
even if i != cpu. Fix the check accordingly.

While here, move tl->mask(cpu) out of the loop, and make the in-loop
code calculating tl->mask(i) only once.

Catched with CONFIG_DEBUG_BITMAP:
[    0.867917] Call Trace:
[    0.868209]  <TASK>
[    0.868471]  build_sched_domains+0x36f/0x1a40
[    0.868576]  sched_init_smp+0x44/0xba
[    0.869012]  ? mtrr_aps_init+0x84/0xa0
[    0.869465]  kernel_init_freeable+0x12e/0x26e
[    0.869982]  ? rest_init+0xd0/0xd0
[    0.870406]  kernel_init+0x16/0x120
[    0.870821]  ret_from_fork+0x22/0x30
[    0.871244]  </TASK>
[    0.871502] ---[ end trace 0000000000000000 ]---
[    0.872040] b1:              ffffffffb1fd3480
[    0.872041] b2:              ffffffffb1fd3480
[    0.872041] b3:              0
[    0.872042] nbits:   256
[    0.872042] start:   0
[    0.872042] off:     0
[    0.872043] Bitmap: parameters check failed
[    0.872043] include/linux/bitmap.h [427]: bitmap_equal

Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 kernel/sched/topology.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

Comments

Peter Zijlstra July 18, 2022, 9:37 p.m. UTC | #1
On Mon, Jul 18, 2022 at 12:28:43PM -0700, Yury Norov wrote:

>  kernel/sched/topology.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> index 05b6c2ad90b9..ad32d0a43424 100644
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -2211,6 +2211,8 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve
>  static bool topology_span_sane(struct sched_domain_topology_level *tl,
>  			      const struct cpumask *cpu_map, int cpu)
>  {
> +	const struct cpumask *mc = tl->mask(cpu);
> +	const struct cpumask *mi;
>  	int i;
>  
>  	/* NUMA levels are allowed to overlap */
> @@ -2226,14 +2228,18 @@ static bool topology_span_sane(struct sched_domain_topology_level *tl,
>  	for_each_cpu(i, cpu_map) {
>  		if (i == cpu)
>  			continue;
> +
> +		mi = tl->mask(i);
> +		if (mi == mc)
> +			continue;
> +
>  		/*
>  		 * We should 'and' all those masks with 'cpu_map' to exactly
>  		 * match the topology we're about to build, but that can only
>  		 * remove CPUs, which only lessens our ability to detect
>  		 * overlaps
>  		 */
> -		if (!cpumask_equal(tl->mask(cpu), tl->mask(i)) &&
> -		    cpumask_intersects(tl->mask(cpu), tl->mask(i)))
> +		if (!cpumask_equal(mc, mi) && cpumask_intersects(mc, mi))
>  			return false;
>  	}

This is once again a super slow path; but I don't suppose you're making
the code worse in this case.
diff mbox series

Patch

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 05b6c2ad90b9..ad32d0a43424 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2211,6 +2211,8 @@  static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve
 static bool topology_span_sane(struct sched_domain_topology_level *tl,
 			      const struct cpumask *cpu_map, int cpu)
 {
+	const struct cpumask *mc = tl->mask(cpu);
+	const struct cpumask *mi;
 	int i;
 
 	/* NUMA levels are allowed to overlap */
@@ -2226,14 +2228,18 @@  static bool topology_span_sane(struct sched_domain_topology_level *tl,
 	for_each_cpu(i, cpu_map) {
 		if (i == cpu)
 			continue;
+
+		mi = tl->mask(i);
+		if (mi == mc)
+			continue;
+
 		/*
 		 * We should 'and' all those masks with 'cpu_map' to exactly
 		 * match the topology we're about to build, but that can only
 		 * remove CPUs, which only lessens our ability to detect
 		 * overlaps
 		 */
-		if (!cpumask_equal(tl->mask(cpu), tl->mask(i)) &&
-		    cpumask_intersects(tl->mask(cpu), tl->mask(i)))
+		if (!cpumask_equal(mc, mi) && cpumask_intersects(mc, mi))
 			return false;
 	}