Message ID | 20230116101009.23694-4-jolsa@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | kallsyms: Optimize the search for module symbols by livepatch and bpf | expand |
On 2023/1/16 18:10, Jiri Olsa wrote: > We currently use module_kallsyms_on_each_symbol that iterates all > modules/symbols and we try to lookup each such address in user > provided symbols/addresses to get list of used modules. > > This fix instead only iterates provided kprobe addresses and calls > __module_address on each to get list of used modules. This turned > out ot be simpler and also bit faster. ot --> to Reviewed-by: Zhen Lei <thunder.leizhen@huawei.com> > > On my setup with workload (executed 10 times): > > # test_progs -t kprobe_multi_bench_attach/modules > > Current code: > > Performance counter stats for './test.sh' (5 runs): > > 76,081,161,596 cycles:k ( +- 0.47% ) > > 18.3867 +- 0.0992 seconds time elapsed ( +- 0.54% ) > > With the fix: > > Performance counter stats for './test.sh' (5 runs): > > 74,079,889,063 cycles:k ( +- 0.04% ) > > 17.8514 +- 0.0218 seconds time elapsed ( +- 0.12% ) > > Signed-off-by: Jiri Olsa <jolsa@kernel.org> > --- > kernel/trace/bpf_trace.c | 93 ++++++++++++++++++++-------------------- > 1 file changed, 47 insertions(+), 46 deletions(-) > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index 095f7f8d34a1..8124f1ad0d4a 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -2682,69 +2682,77 @@ static void symbols_swap_r(void *a, void *b, int size, const void *priv) > } > } > > -struct module_addr_args { > - unsigned long *addrs; > - u32 addrs_cnt; > +struct modules_array { > struct module **mods; > int mods_cnt; > int mods_cap; > }; > > -static int module_callback(void *data, const char *name, > - struct module *mod, unsigned long addr) > +static int add_module(struct modules_array *arr, struct module *mod) > { > - struct module_addr_args *args = data; > struct module **mods; > > - /* We iterate all modules symbols and for each we: > - * - search for it in provided addresses array > - * - if found we check if we already have the module pointer stored > - * (we iterate modules sequentially, so we can check just the last > - * module pointer) > - * - take module reference and store it > - */ > - if (!bsearch(&addr, args->addrs, args->addrs_cnt, sizeof(addr), > - bpf_kprobe_multi_addrs_cmp)) > - return 0; > - > - if (args->mods && args->mods[args->mods_cnt - 1] == mod) > - return 0; > - > - if (args->mods_cnt == args->mods_cap) { > - args->mods_cap = max(16, args->mods_cap * 3 / 2); > - mods = krealloc_array(args->mods, args->mods_cap, sizeof(*mods), GFP_KERNEL); > + if (arr->mods_cnt == arr->mods_cap) { > + arr->mods_cap = max(16, arr->mods_cap * 3 / 2); > + mods = krealloc_array(arr->mods, arr->mods_cap, sizeof(*mods), GFP_KERNEL); > if (!mods) > return -ENOMEM; > - args->mods = mods; > + arr->mods = mods; > } > > - if (!try_module_get(mod)) > - return -EINVAL; > - > - args->mods[args->mods_cnt] = mod; > - args->mods_cnt++; > + arr->mods[arr->mods_cnt] = mod; > + arr->mods_cnt++; > return 0; > } > > +static bool has_module(struct modules_array *arr, struct module *mod) > +{ > + int i; > + > + for (i = arr->mods_cnt - 1; i >= 0; i--) { > + if (arr->mods[i] == mod) > + return true; > + } > + return false; > +} > + > static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u32 addrs_cnt) > { > - struct module_addr_args args = { > - .addrs = addrs, > - .addrs_cnt = addrs_cnt, > - }; > - int err; > + struct modules_array arr = {}; > + u32 i, err = 0; > + > + for (i = 0; i < addrs_cnt; i++) { > + struct module *mod; > + > + preempt_disable(); > + mod = __module_address(addrs[i]); > + /* Either no module or we it's already stored */ > + if (!mod || has_module(&arr, mod)) { > + preempt_enable(); > + continue; > + } > + if (!try_module_get(mod)) > + err = -EINVAL; > + preempt_enable(); > + if (err) > + break; > + err = add_module(&arr, mod); > + if (err) { > + module_put(mod); > + break; > + } > + } > > /* We return either err < 0 in case of error, ... */ > - err = module_kallsyms_on_each_symbol(NULL, module_callback, &args); > if (err) { > - kprobe_multi_put_modules(args.mods, args.mods_cnt); > - kfree(args.mods); > + kprobe_multi_put_modules(arr.mods, arr.mods_cnt); > + kfree(arr.mods); > return err; > } > > /* or number of modules found if everything is ok. */ > - *mods = args.mods; > - return args.mods_cnt; > + *mods = arr.mods; > + return arr.mods_cnt; > } > > int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) > @@ -2857,13 +2865,6 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr > bpf_kprobe_multi_cookie_cmp, > bpf_kprobe_multi_cookie_swap, > link); > - } else { > - /* > - * We need to sort addrs array even if there are no cookies > - * provided, to allow bsearch in get_modules_for_addrs. > - */ > - sort(addrs, cnt, sizeof(*addrs), > - bpf_kprobe_multi_addrs_cmp, NULL); > } > > err = get_modules_for_addrs(&link->mods, addrs, cnt); >
On Mon 2023-01-16 11:10:09, Jiri Olsa wrote: > We currently use module_kallsyms_on_each_symbol that iterates all > modules/symbols and we try to lookup each such address in user > provided symbols/addresses to get list of used modules. > > This fix instead only iterates provided kprobe addresses and calls > __module_address on each to get list of used modules. This turned > out ot be simpler and also bit faster. > > On my setup with workload (executed 10 times): > > # test_progs -t kprobe_multi_bench_attach/modules > > Current code: > > Performance counter stats for './test.sh' (5 runs): > > 76,081,161,596 cycles:k ( +- 0.47% ) > > 18.3867 +- 0.0992 seconds time elapsed ( +- 0.54% ) > > With the fix: > > Performance counter stats for './test.sh' (5 runs): > > 74,079,889,063 cycles:k ( +- 0.04% ) > > 17.8514 +- 0.0218 seconds time elapsed ( +- 0.12% ) > > Signed-off-by: Jiri Olsa <jolsa@kernel.org> The change looks good to me: Reviewed-by: Petr Mladek <pmladek@suse.com> Best Regards, Petr
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 095f7f8d34a1..8124f1ad0d4a 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2682,69 +2682,77 @@ static void symbols_swap_r(void *a, void *b, int size, const void *priv) } } -struct module_addr_args { - unsigned long *addrs; - u32 addrs_cnt; +struct modules_array { struct module **mods; int mods_cnt; int mods_cap; }; -static int module_callback(void *data, const char *name, - struct module *mod, unsigned long addr) +static int add_module(struct modules_array *arr, struct module *mod) { - struct module_addr_args *args = data; struct module **mods; - /* We iterate all modules symbols and for each we: - * - search for it in provided addresses array - * - if found we check if we already have the module pointer stored - * (we iterate modules sequentially, so we can check just the last - * module pointer) - * - take module reference and store it - */ - if (!bsearch(&addr, args->addrs, args->addrs_cnt, sizeof(addr), - bpf_kprobe_multi_addrs_cmp)) - return 0; - - if (args->mods && args->mods[args->mods_cnt - 1] == mod) - return 0; - - if (args->mods_cnt == args->mods_cap) { - args->mods_cap = max(16, args->mods_cap * 3 / 2); - mods = krealloc_array(args->mods, args->mods_cap, sizeof(*mods), GFP_KERNEL); + if (arr->mods_cnt == arr->mods_cap) { + arr->mods_cap = max(16, arr->mods_cap * 3 / 2); + mods = krealloc_array(arr->mods, arr->mods_cap, sizeof(*mods), GFP_KERNEL); if (!mods) return -ENOMEM; - args->mods = mods; + arr->mods = mods; } - if (!try_module_get(mod)) - return -EINVAL; - - args->mods[args->mods_cnt] = mod; - args->mods_cnt++; + arr->mods[arr->mods_cnt] = mod; + arr->mods_cnt++; return 0; } +static bool has_module(struct modules_array *arr, struct module *mod) +{ + int i; + + for (i = arr->mods_cnt - 1; i >= 0; i--) { + if (arr->mods[i] == mod) + return true; + } + return false; +} + static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u32 addrs_cnt) { - struct module_addr_args args = { - .addrs = addrs, - .addrs_cnt = addrs_cnt, - }; - int err; + struct modules_array arr = {}; + u32 i, err = 0; + + for (i = 0; i < addrs_cnt; i++) { + struct module *mod; + + preempt_disable(); + mod = __module_address(addrs[i]); + /* Either no module or we it's already stored */ + if (!mod || has_module(&arr, mod)) { + preempt_enable(); + continue; + } + if (!try_module_get(mod)) + err = -EINVAL; + preempt_enable(); + if (err) + break; + err = add_module(&arr, mod); + if (err) { + module_put(mod); + break; + } + } /* We return either err < 0 in case of error, ... */ - err = module_kallsyms_on_each_symbol(NULL, module_callback, &args); if (err) { - kprobe_multi_put_modules(args.mods, args.mods_cnt); - kfree(args.mods); + kprobe_multi_put_modules(arr.mods, arr.mods_cnt); + kfree(arr.mods); return err; } /* or number of modules found if everything is ok. */ - *mods = args.mods; - return args.mods_cnt; + *mods = arr.mods; + return arr.mods_cnt; } int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) @@ -2857,13 +2865,6 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr bpf_kprobe_multi_cookie_cmp, bpf_kprobe_multi_cookie_swap, link); - } else { - /* - * We need to sort addrs array even if there are no cookies - * provided, to allow bsearch in get_modules_for_addrs. - */ - sort(addrs, cnt, sizeof(*addrs), - bpf_kprobe_multi_addrs_cmp, NULL); } err = get_modules_for_addrs(&link->mods, addrs, cnt);
We currently use module_kallsyms_on_each_symbol that iterates all modules/symbols and we try to lookup each such address in user provided symbols/addresses to get list of used modules. This fix instead only iterates provided kprobe addresses and calls __module_address on each to get list of used modules. This turned out ot be simpler and also bit faster. On my setup with workload (executed 10 times): # test_progs -t kprobe_multi_bench_attach/modules Current code: Performance counter stats for './test.sh' (5 runs): 76,081,161,596 cycles:k ( +- 0.47% ) 18.3867 +- 0.0992 seconds time elapsed ( +- 0.54% ) With the fix: Performance counter stats for './test.sh' (5 runs): 74,079,889,063 cycles:k ( +- 0.04% ) 17.8514 +- 0.0218 seconds time elapsed ( +- 0.12% ) Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- kernel/trace/bpf_trace.c | 93 ++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 46 deletions(-)