Message ID | 20230113143303.867580-4-jolsa@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | kallsyms: Optimize the search for module symbols by livepatch and bpf | expand |
On Fri, Jan 13, 2023 at 6:44 AM Jiri Olsa <jolsa@kernel.org> wrote: > > We currently use module_kallsyms_on_each_symbol that iterates all > modules/symbols and we try to lookup each such address in user > provided symbols/addresses to get list of used modules. > > This fix instead only iterates provided kprobe addresses and calls > __module_address on each to get list of used modules. This turned > out to be simpler and also bit faster. > > On my setup with workload being (executed 10 times): > > # test_progs -t kprobe_multi_bench_attach_module > > Current code: > > Performance counter stats for './test.sh' (5 runs): > > 76,081,161,596 cycles:k ( +- 0.47% ) > > 18.3867 +- 0.0992 seconds time elapsed ( +- 0.54% ) > > With the fix: > > Performance counter stats for './test.sh' (5 runs): > > 74,079,889,063 cycles:k ( +- 0.04% ) > > 17.8514 +- 0.0218 seconds time elapsed ( +- 0.12% ) > > Signed-off-by: Jiri Olsa <jolsa@kernel.org> > --- > kernel/trace/bpf_trace.c | 95 +++++++++++++++++++++------------------- > 1 file changed, 49 insertions(+), 46 deletions(-) > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index 095f7f8d34a1..90c5d5026831 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -2682,69 +2682,79 @@ static void symbols_swap_r(void *a, void *b, int size, const void *priv) > } > } > > -struct module_addr_args { > - unsigned long *addrs; > - u32 addrs_cnt; > +struct modules_array { > struct module **mods; > int mods_cnt; > int mods_cap; > }; > > -static int module_callback(void *data, const char *name, > - struct module *mod, unsigned long addr) > +static int add_module(struct modules_array *arr, struct module *mod) > { > - struct module_addr_args *args = data; > struct module **mods; > > - /* We iterate all modules symbols and for each we: > - * - search for it in provided addresses array > - * - if found we check if we already have the module pointer stored > - * (we iterate modules sequentially, so we can check just the last > - * module pointer) > - * - take module reference and store it > - */ > - if (!bsearch(&addr, args->addrs, args->addrs_cnt, sizeof(addr), > - bpf_kprobe_multi_addrs_cmp)) > - return 0; > - > - if (args->mods && args->mods[args->mods_cnt - 1] == mod) > - return 0; > - > - if (args->mods_cnt == args->mods_cap) { > - args->mods_cap = max(16, args->mods_cap * 3 / 2); > - mods = krealloc_array(args->mods, args->mods_cap, sizeof(*mods), GFP_KERNEL); > + if (arr->mods_cnt == arr->mods_cap) { > + arr->mods_cap = max(16, arr->mods_cap * 3 / 2); > + mods = krealloc_array(arr->mods, arr->mods_cap, sizeof(*mods), GFP_KERNEL); > if (!mods) > return -ENOMEM; > - args->mods = mods; > + arr->mods = mods; > } > > - if (!try_module_get(mod)) > - return -EINVAL; > - > - args->mods[args->mods_cnt] = mod; > - args->mods_cnt++; > + arr->mods[arr->mods_cnt] = mod; > + arr->mods_cnt++; > return 0; > } > > +static bool has_module(struct modules_array *arr, struct module *mod) > +{ > + int i; > + > + if (!arr->mods) > + return false; > + for (i = arr->mods_cnt; i >= 0; i--) { This should be "i = arr->mods_cnt - 1", no? > + if (arr->mods[i] == mod) > + return true; > + } > + return false; > +} > + > static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u32 addrs_cnt) > { > - struct module_addr_args args = { > - .addrs = addrs, > - .addrs_cnt = addrs_cnt, > - }; > - int err; > + struct modules_array arr = {}; > + u32 i, err = 0; > + > + for (i = 0; i < addrs_cnt; i++) { > + struct module *mod; > + > + preempt_disable(); > + mod = __module_address(addrs[i]); > + /* Either no module or we it's already stored */ > + if (!mod || (mod && has_module(&arr, mod))) { nit: This can be simplified: if (!mod || has_module(&arr, mod)) { > + preempt_enable(); > + continue; > + } [...]
On Fri, Jan 13, 2023 at 12:41:50PM -0800, Song Liu wrote: > On Fri, Jan 13, 2023 at 6:44 AM Jiri Olsa <jolsa@kernel.org> wrote: > > > > We currently use module_kallsyms_on_each_symbol that iterates all > > modules/symbols and we try to lookup each such address in user > > provided symbols/addresses to get list of used modules. > > > > This fix instead only iterates provided kprobe addresses and calls > > __module_address on each to get list of used modules. This turned > > out to be simpler and also bit faster. > > > > On my setup with workload being (executed 10 times): > > > > # test_progs -t kprobe_multi_bench_attach_module > > > > Current code: > > > > Performance counter stats for './test.sh' (5 runs): > > > > 76,081,161,596 cycles:k ( +- 0.47% ) > > > > 18.3867 +- 0.0992 seconds time elapsed ( +- 0.54% ) > > > > With the fix: > > > > Performance counter stats for './test.sh' (5 runs): > > > > 74,079,889,063 cycles:k ( +- 0.04% ) > > > > 17.8514 +- 0.0218 seconds time elapsed ( +- 0.12% ) > > > > Signed-off-by: Jiri Olsa <jolsa@kernel.org> > > --- > > kernel/trace/bpf_trace.c | 95 +++++++++++++++++++++------------------- > > 1 file changed, 49 insertions(+), 46 deletions(-) > > > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > > index 095f7f8d34a1..90c5d5026831 100644 > > --- a/kernel/trace/bpf_trace.c > > +++ b/kernel/trace/bpf_trace.c > > @@ -2682,69 +2682,79 @@ static void symbols_swap_r(void *a, void *b, int size, const void *priv) > > } > > } > > > > -struct module_addr_args { > > - unsigned long *addrs; > > - u32 addrs_cnt; > > +struct modules_array { > > struct module **mods; > > int mods_cnt; > > int mods_cap; > > }; > > > > -static int module_callback(void *data, const char *name, > > - struct module *mod, unsigned long addr) > > +static int add_module(struct modules_array *arr, struct module *mod) > > { > > - struct module_addr_args *args = data; > > struct module **mods; > > > > - /* We iterate all modules symbols and for each we: > > - * - search for it in provided addresses array > > - * - if found we check if we already have the module pointer stored > > - * (we iterate modules sequentially, so we can check just the last > > - * module pointer) > > - * - take module reference and store it > > - */ > > - if (!bsearch(&addr, args->addrs, args->addrs_cnt, sizeof(addr), > > - bpf_kprobe_multi_addrs_cmp)) > > - return 0; > > - > > - if (args->mods && args->mods[args->mods_cnt - 1] == mod) > > - return 0; > > - > > - if (args->mods_cnt == args->mods_cap) { > > - args->mods_cap = max(16, args->mods_cap * 3 / 2); > > - mods = krealloc_array(args->mods, args->mods_cap, sizeof(*mods), GFP_KERNEL); > > + if (arr->mods_cnt == arr->mods_cap) { > > + arr->mods_cap = max(16, arr->mods_cap * 3 / 2); > > + mods = krealloc_array(arr->mods, arr->mods_cap, sizeof(*mods), GFP_KERNEL); > > if (!mods) > > return -ENOMEM; > > - args->mods = mods; > > + arr->mods = mods; > > } > > > > - if (!try_module_get(mod)) > > - return -EINVAL; > > - > > - args->mods[args->mods_cnt] = mod; > > - args->mods_cnt++; > > + arr->mods[arr->mods_cnt] = mod; > > + arr->mods_cnt++; > > return 0; > > } > > > > +static bool has_module(struct modules_array *arr, struct module *mod) > > +{ > > + int i; > > + > > + if (!arr->mods) > > + return false; > > + for (i = arr->mods_cnt; i >= 0; i--) { > > This should be "i = arr->mods_cnt - 1", no? right > > > + if (arr->mods[i] == mod) > > + return true; > > + } > > + return false; > > +} > > + > > static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u32 addrs_cnt) > > { > > - struct module_addr_args args = { > > - .addrs = addrs, > > - .addrs_cnt = addrs_cnt, > > - }; > > - int err; > > + struct modules_array arr = {}; > > + u32 i, err = 0; > > + > > + for (i = 0; i < addrs_cnt; i++) { > > + struct module *mod; > > + > > + preempt_disable(); > > + mod = __module_address(addrs[i]); > > + /* Either no module or we it's already stored */ > > + if (!mod || (mod && has_module(&arr, mod))) { > > nit: This can be simplified: > > if (!mod || has_module(&arr, mod)) { yep, will change thanks, jirka
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 095f7f8d34a1..90c5d5026831 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2682,69 +2682,79 @@ static void symbols_swap_r(void *a, void *b, int size, const void *priv) } } -struct module_addr_args { - unsigned long *addrs; - u32 addrs_cnt; +struct modules_array { struct module **mods; int mods_cnt; int mods_cap; }; -static int module_callback(void *data, const char *name, - struct module *mod, unsigned long addr) +static int add_module(struct modules_array *arr, struct module *mod) { - struct module_addr_args *args = data; struct module **mods; - /* We iterate all modules symbols and for each we: - * - search for it in provided addresses array - * - if found we check if we already have the module pointer stored - * (we iterate modules sequentially, so we can check just the last - * module pointer) - * - take module reference and store it - */ - if (!bsearch(&addr, args->addrs, args->addrs_cnt, sizeof(addr), - bpf_kprobe_multi_addrs_cmp)) - return 0; - - if (args->mods && args->mods[args->mods_cnt - 1] == mod) - return 0; - - if (args->mods_cnt == args->mods_cap) { - args->mods_cap = max(16, args->mods_cap * 3 / 2); - mods = krealloc_array(args->mods, args->mods_cap, sizeof(*mods), GFP_KERNEL); + if (arr->mods_cnt == arr->mods_cap) { + arr->mods_cap = max(16, arr->mods_cap * 3 / 2); + mods = krealloc_array(arr->mods, arr->mods_cap, sizeof(*mods), GFP_KERNEL); if (!mods) return -ENOMEM; - args->mods = mods; + arr->mods = mods; } - if (!try_module_get(mod)) - return -EINVAL; - - args->mods[args->mods_cnt] = mod; - args->mods_cnt++; + arr->mods[arr->mods_cnt] = mod; + arr->mods_cnt++; return 0; } +static bool has_module(struct modules_array *arr, struct module *mod) +{ + int i; + + if (!arr->mods) + return false; + for (i = arr->mods_cnt; i >= 0; i--) { + if (arr->mods[i] == mod) + return true; + } + return false; +} + static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u32 addrs_cnt) { - struct module_addr_args args = { - .addrs = addrs, - .addrs_cnt = addrs_cnt, - }; - int err; + struct modules_array arr = {}; + u32 i, err = 0; + + for (i = 0; i < addrs_cnt; i++) { + struct module *mod; + + preempt_disable(); + mod = __module_address(addrs[i]); + /* Either no module or we it's already stored */ + if (!mod || (mod && has_module(&arr, mod))) { + preempt_enable(); + continue; + } + if (!try_module_get(mod)) + err = -EINVAL; + preempt_enable(); + if (err) + break; + err = add_module(&arr, mod); + if (err) { + module_put(mod); + break; + } + } /* We return either err < 0 in case of error, ... */ - err = module_kallsyms_on_each_symbol(NULL, module_callback, &args); if (err) { - kprobe_multi_put_modules(args.mods, args.mods_cnt); - kfree(args.mods); + kprobe_multi_put_modules(arr.mods, arr.mods_cnt); + kfree(arr.mods); return err; } /* or number of modules found if everything is ok. */ - *mods = args.mods; - return args.mods_cnt; + *mods = arr.mods; + return arr.mods_cnt; } int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) @@ -2857,13 +2867,6 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr bpf_kprobe_multi_cookie_cmp, bpf_kprobe_multi_cookie_swap, link); - } else { - /* - * We need to sort addrs array even if there are no cookies - * provided, to allow bsearch in get_modules_for_addrs. - */ - sort(addrs, cnt, sizeof(*addrs), - bpf_kprobe_multi_addrs_cmp, NULL); } err = get_modules_for_addrs(&link->mods, addrs, cnt);
We currently use module_kallsyms_on_each_symbol that iterates all modules/symbols and we try to lookup each such address in user provided symbols/addresses to get list of used modules. This fix instead only iterates provided kprobe addresses and calls __module_address on each to get list of used modules. This turned out to be simpler and also bit faster. On my setup with workload being (executed 10 times): # test_progs -t kprobe_multi_bench_attach_module Current code: Performance counter stats for './test.sh' (5 runs): 76,081,161,596 cycles:k ( +- 0.47% ) 18.3867 +- 0.0992 seconds time elapsed ( +- 0.54% ) With the fix: Performance counter stats for './test.sh' (5 runs): 74,079,889,063 cycles:k ( +- 0.04% ) 17.8514 +- 0.0218 seconds time elapsed ( +- 0.12% ) Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- kernel/trace/bpf_trace.c | 95 +++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 46 deletions(-)