diff mbox series

[07/16] smp: optimize smp_call_function_many_cond()

Message ID 20220718192844.1805158-8-yury.norov@gmail.com (mailing list archive)
State New
Headers show
Series Introduce DEBUG_BITMAP config option and bitmap_check_params() | expand

Commit Message

Yury Norov July 18, 2022, 7:28 p.m. UTC
smp_call_function_many_cond() is often passed with cpu_online_mask.
If this is the case, we can use num_online_cpus(), which is O(1)
instead of cpumask_{first,next}(), which is O(N).

It can be optimized further: if cpu_online_mask has 0 or single bit
set (depending on cpu_online(this_cpu), we can return result without
AND'ing with user's mask.

Caught with CONFIG_DEBUG_BITMAP:
[    7.830337] Call trace:
[    7.830397]  __bitmap_check_params+0x1d8/0x260
[    7.830499]  smp_call_function_many_cond+0x1e8/0x45c
[    7.830607]  kick_all_cpus_sync+0x44/0x80
[    7.830698]  bpf_int_jit_compile+0x34c/0x5cc
[    7.830796]  bpf_prog_select_runtime+0x118/0x190
[    7.830900]  bpf_prepare_filter+0x3dc/0x51c
[    7.830995]  __get_filter+0xd4/0x170
[    7.831145]  sk_attach_filter+0x18/0xb0
[    7.831236]  sock_setsockopt+0x5b0/0x1214
[    7.831330]  __sys_setsockopt+0x144/0x170
[    7.831431]  __arm64_sys_setsockopt+0x2c/0x40
[    7.831541]  invoke_syscall+0x48/0x114
[    7.831634]  el0_svc_common.constprop.0+0x44/0xfc
[    7.831745]  do_el0_svc+0x30/0xc0
[    7.831825]  el0_svc+0x2c/0x84
[    7.831899]  el0t_64_sync_handler+0xbc/0x140
[    7.831999]  el0t_64_sync+0x18c/0x190
[    7.832086] ---[ end trace 0000000000000000 ]---
[    7.832375] b1:		ffff24d1ffd98a48
[    7.832385] b2:		ffffa65533a29a38
[    7.832393] b3:		ffffa65533a29a38
[    7.832400] nbits:	256
[    7.832407] start:	0
[    7.832412] off:	0
[    7.832418] smp: Bitmap: parameters check failed
[    7.832432] smp: include/linux/bitmap.h [363]: bitmap_and

Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 kernel/smp.c | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

Comments

Peter Zijlstra July 18, 2022, 9:26 p.m. UTC | #1
On Mon, Jul 18, 2022 at 12:28:35PM -0700, Yury Norov wrote:

> diff --git a/kernel/smp.c b/kernel/smp.c
> index dd215f439426..7ed2b9b12f74 100644
> --- a/kernel/smp.c
> +++ b/kernel/smp.c
> @@ -880,6 +880,28 @@ EXPORT_SYMBOL_GPL(smp_call_function_any);
>  #define SCF_WAIT	(1U << 0)
>  #define SCF_RUN_LOCAL	(1U << 1)
>  
> +/* Check if we need remote execution, i.e., any CPU excluding this one. */
> +static inline bool __need_remote_exec(const struct cpumask *mask, unsigned int this_cpu)
> +{
> +	unsigned int cpu;
> +
> +	switch (num_online_cpus()) {
> +	case 0:
> +		return false;
> +	case 1:
> +		return cpu_online(this_cpu) ? false : true;
> +	default:
> +		if (mask == cpu_online_mask)
> +			return true;
> +	}
> +
> +	cpu = cpumask_first_and(mask, cpu_online_mask);
> +	if (cpu == this_cpu)
> +		cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
> +
> +	return cpu < nr_cpu_ids;
> +}
> +
>  static void smp_call_function_many_cond(const struct cpumask *mask,
>  					smp_call_func_t func, void *info,
>  					unsigned int scf_flags,
> @@ -916,12 +938,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
>  	if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask))
>  		run_local = true;
>  
> -	/* Check if we need remote execution, i.e., any CPU excluding this one. */
> -	cpu = cpumask_first_and(mask, cpu_online_mask);
> -	if (cpu == this_cpu)
> -		cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
> -	if (cpu < nr_cpu_ids)
> -		run_remote = true;
> +	run_remote = __need_remote_exec(mask, this_cpu);
>  
>  	if (run_remote) {
>  		cfd = this_cpu_ptr(&cfd_data);

This is more complex code for, very little to no gain. Why ?!
Andy Shevchenko July 18, 2022, 9:36 p.m. UTC | #2
On Mon, Jul 18, 2022 at 12:28:35PM -0700, Yury Norov wrote:
> smp_call_function_many_cond() is often passed with cpu_online_mask.
> If this is the case, we can use num_online_cpus(), which is O(1)
> instead of cpumask_{first,next}(), which is O(N).
> 
> It can be optimized further: if cpu_online_mask has 0 or single bit
> set (depending on cpu_online(this_cpu), we can return result without
> AND'ing with user's mask.

> Caught with CONFIG_DEBUG_BITMAP:
> [    7.830337] Call trace:
> [    7.830397]  __bitmap_check_params+0x1d8/0x260
> [    7.830499]  smp_call_function_many_cond+0x1e8/0x45c
> [    7.830607]  kick_all_cpus_sync+0x44/0x80
> [    7.830698]  bpf_int_jit_compile+0x34c/0x5cc
> [    7.830796]  bpf_prog_select_runtime+0x118/0x190
> [    7.830900]  bpf_prepare_filter+0x3dc/0x51c
> [    7.830995]  __get_filter+0xd4/0x170
> [    7.831145]  sk_attach_filter+0x18/0xb0
> [    7.831236]  sock_setsockopt+0x5b0/0x1214
> [    7.831330]  __sys_setsockopt+0x144/0x170
> [    7.831431]  __arm64_sys_setsockopt+0x2c/0x40
> [    7.831541]  invoke_syscall+0x48/0x114
> [    7.831634]  el0_svc_common.constprop.0+0x44/0xfc
> [    7.831745]  do_el0_svc+0x30/0xc0
> [    7.831825]  el0_svc+0x2c/0x84
> [    7.831899]  el0t_64_sync_handler+0xbc/0x140
> [    7.831999]  el0t_64_sync+0x18c/0x190
> [    7.832086] ---[ end trace 0000000000000000 ]---
> [    7.832375] b1:		ffff24d1ffd98a48
> [    7.832385] b2:		ffffa65533a29a38
> [    7.832393] b3:		ffffa65533a29a38
> [    7.832400] nbits:	256
> [    7.832407] start:	0
> [    7.832412] off:	0
> [    7.832418] smp: Bitmap: parameters check failed
> [    7.832432] smp: include/linux/bitmap.h [363]: bitmap_and

Documentation specifically says:

https://www.kernel.org/doc/html/latest/process/submitting-patches.html#backtraces-in-commit-mesages

...

> +	default:
> +		if (mask == cpu_online_mask)
> +			return true;

Instead, put (missed) break; here and do "default" case together below.

> +	cpu = cpumask_first_and(mask, cpu_online_mask);
> +	if (cpu == this_cpu)
> +		cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
> +
> +	return cpu < nr_cpu_ids;

...

> +	run_remote = __need_remote_exec(mask, this_cpu);

>  

Now you may remove this blank line.

>  	if (run_remote) {
diff mbox series

Patch

diff --git a/kernel/smp.c b/kernel/smp.c
index dd215f439426..7ed2b9b12f74 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -880,6 +880,28 @@  EXPORT_SYMBOL_GPL(smp_call_function_any);
 #define SCF_WAIT	(1U << 0)
 #define SCF_RUN_LOCAL	(1U << 1)
 
+/* Check if we need remote execution, i.e., any CPU excluding this one. */
+static inline bool __need_remote_exec(const struct cpumask *mask, unsigned int this_cpu)
+{
+	unsigned int cpu;
+
+	switch (num_online_cpus()) {
+	case 0:
+		return false;
+	case 1:
+		return cpu_online(this_cpu) ? false : true;
+	default:
+		if (mask == cpu_online_mask)
+			return true;
+	}
+
+	cpu = cpumask_first_and(mask, cpu_online_mask);
+	if (cpu == this_cpu)
+		cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
+
+	return cpu < nr_cpu_ids;
+}
+
 static void smp_call_function_many_cond(const struct cpumask *mask,
 					smp_call_func_t func, void *info,
 					unsigned int scf_flags,
@@ -916,12 +938,7 @@  static void smp_call_function_many_cond(const struct cpumask *mask,
 	if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask))
 		run_local = true;
 
-	/* Check if we need remote execution, i.e., any CPU excluding this one. */
-	cpu = cpumask_first_and(mask, cpu_online_mask);
-	if (cpu == this_cpu)
-		cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
-	if (cpu < nr_cpu_ids)
-		run_remote = true;
+	run_remote = __need_remote_exec(mask, this_cpu);
 
 	if (run_remote) {
 		cfd = this_cpu_ptr(&cfd_data);