Message ID | 20201126165748.1748417-1-revest@google.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | [bpf-next,1/2] bpf: Add a bpf_kallsyms_lookup helper | expand |
Context | Check | Description |
---|---|---|
netdev/cover_letter | success | Link |
netdev/fixes_present | success | Link |
netdev/patch_count | success | Link |
netdev/tree_selection | success | Clearly marked for bpf-next |
netdev/subject_prefix | success | Link |
netdev/source_inline | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Link |
netdev/module_param | success | Was 0 now: 0 |
netdev/build_32bit | fail | Errors and warnings before: 15753 this patch: 15754 |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/verify_fixes | success | Link |
netdev/checkpatch | warning | WARNING: From:/Signed-off-by: email address mismatch: 'From: Florent Revest <revest@chromium.org>' != 'Signed-off-by: Florent Revest <revest@google.com>' WARNING: line length of 102 exceeds 80 columns |
netdev/build_allmodconfig_warn | fail | Errors and warnings before: 15665 this patch: 15666 |
netdev/header_inline | success | Link |
netdev/stable | success | Stable not CCed |
[...] > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index c3458ec1f30a..670998635eac 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -3817,6 +3817,21 @@ union bpf_attr { > * The **hash_algo** is returned on success, > * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if > * invalid arguments are passed. > + * > + * long bpf_kallsyms_lookup(u64 address, char *symbol, u32 symbol_size, char *module, u32 module_size) > + * Description > + * Uses kallsyms to write the name of the symbol at *address* > + * into *symbol* of size *symbol_sz*. This is guaranteed to be > + * zero terminated. > + * If the symbol is in a module, up to *module_size* bytes of > + * the module name is written in *module*. This is also > + * guaranteed to be zero-terminated. Note: a module name > + * is always shorter than 64 bytes. > + * Return > + * On success, the strictly positive length of the full symbol > + * name, If this is greater than *symbol_size*, the written > + * symbol is truncated. > + * On error, a negative value. > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -3981,6 +3996,7 @@ union bpf_attr { > FN(bprm_opts_set), \ > FN(ktime_get_coarse_ns), \ > FN(ima_inode_hash), \ > + FN(kallsyms_lookup), \ > /* */ > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index d255bc9b2bfa..9d86e20c2b13 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -17,6 +17,7 @@ > #include <linux/error-injection.h> > #include <linux/btf_ids.h> > #include <linux/bpf_lsm.h> > +#include <linux/kallsyms.h> > > #include <net/bpf_sk_storage.h> > > @@ -1260,6 +1261,44 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = { > .arg5_type = ARG_ANYTHING, > }; > > +BPF_CALL_5(bpf_kallsyms_lookup, u64, address, char *, symbol, u32, symbol_size, > + char *, module, u32, module_size) > +{ > + char buffer[KSYM_SYMBOL_LEN]; > + unsigned long offset, size; > + const char *name; > + char *modname; > + long ret; > + > + name = kallsyms_lookup(address, &size, &offset, &modname, buffer); > + if (!name) > + return -EINVAL; > + > + ret = strlen(name) + 1; > + if (symbol_size) { > + strncpy(symbol, name, symbol_size); > + symbol[symbol_size - 1] = '\0'; > + } > + > + if (modname && module_size) { > + strncpy(module, modname, module_size); The return value does not seem to be impacted by the truncation of the module name, I wonder if it is better to just use a single buffer. For example, the proc kallsyms shows symbols as: <symbol_name> [module_name] https://github.com/torvalds/linux/blob/master/kernel/kallsyms.c#L648 The square brackets do seem to be a waste here, so maybe we could use a single character as a separator? > + module[module_size - 1] = '\0'; > + } > + > + return ret; > +} > + > +const struct bpf_func_proto bpf_kallsyms_lookup_proto = { > + .func = bpf_kallsyms_lookup, > + .gpl_only = false, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_ANYTHING, > + .arg2_type = ARG_PTR_TO_MEM, > + .arg3_type = ARG_CONST_SIZE, > + .arg4_type = ARG_PTR_TO_MEM, > + .arg5_type = ARG_CONST_SIZE, > +}; > + [...]
On 11/26/20 8:57 AM, Florent Revest wrote: > This helper exposes the kallsyms_lookup function to eBPF tracing > programs. This can be used to retrieve the name of the symbol at an > address. For example, when hooking into nf_register_net_hook, one can > audit the name of the registered netfilter hook and potentially also > the name of the module in which the symbol is located. > > Signed-off-by: Florent Revest <revest@google.com> > --- > include/uapi/linux/bpf.h | 16 +++++++++++++ > kernel/trace/bpf_trace.c | 41 ++++++++++++++++++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 16 +++++++++++++ > 3 files changed, 73 insertions(+) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index c3458ec1f30a..670998635eac 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -3817,6 +3817,21 @@ union bpf_attr { > * The **hash_algo** is returned on success, > * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if > * invalid arguments are passed. > + * > + * long bpf_kallsyms_lookup(u64 address, char *symbol, u32 symbol_size, char *module, u32 module_size) > + * Description > + * Uses kallsyms to write the name of the symbol at *address* > + * into *symbol* of size *symbol_sz*. This is guaranteed to be > + * zero terminated. > + * If the symbol is in a module, up to *module_size* bytes of > + * the module name is written in *module*. This is also > + * guaranteed to be zero-terminated. Note: a module name > + * is always shorter than 64 bytes. > + * Return > + * On success, the strictly positive length of the full symbol > + * name, If this is greater than *symbol_size*, the written > + * symbol is truncated. > + * On error, a negative value. > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -3981,6 +3996,7 @@ union bpf_attr { > FN(bprm_opts_set), \ > FN(ktime_get_coarse_ns), \ > FN(ima_inode_hash), \ > + FN(kallsyms_lookup), \ > /* */ > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index d255bc9b2bfa..9d86e20c2b13 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -17,6 +17,7 @@ > #include <linux/error-injection.h> > #include <linux/btf_ids.h> > #include <linux/bpf_lsm.h> > +#include <linux/kallsyms.h> > > #include <net/bpf_sk_storage.h> > > @@ -1260,6 +1261,44 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = { > .arg5_type = ARG_ANYTHING, > }; > > +BPF_CALL_5(bpf_kallsyms_lookup, u64, address, char *, symbol, u32, symbol_size, > + char *, module, u32, module_size) > +{ > + char buffer[KSYM_SYMBOL_LEN]; > + unsigned long offset, size; > + const char *name; > + char *modname; > + long ret; > + > + name = kallsyms_lookup(address, &size, &offset, &modname, buffer); > + if (!name) > + return -EINVAL; > + > + ret = strlen(name) + 1; > + if (symbol_size) { > + strncpy(symbol, name, symbol_size); > + symbol[symbol_size - 1] = '\0'; > + } > + > + if (modname && module_size) { > + strncpy(module, modname, module_size); > + module[module_size - 1] = '\0'; In this case, module name may be truncated and user did not get any indication from return value. In the helper description, it is mentioned that module name currently is most 64 bytes. But from UAPI perspective, it may be still good to return something to let user know the name is truncated. I do not know what is the best way to do this. One suggestion is to break it into two helpers, one for symbol name and another for module name. What is the use cases people want to get both symbol name and module name and is it common? > + } > + > + return ret; > +} > + > +const struct bpf_func_proto bpf_kallsyms_lookup_proto = { > + .func = bpf_kallsyms_lookup, > + .gpl_only = false, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_ANYTHING, > + .arg2_type = ARG_PTR_TO_MEM, ARG_PTR_TO_UNINIT_MEM? > + .arg3_type = ARG_CONST_SIZE, ARG_CONST_SIZE_OR_ZERO? This is especially true for current format which tries to return both symbol name and module name and user may just want to do one of them. > + .arg4_type = ARG_PTR_TO_MEM, ARG_PTR_TO_UNINIT_MEM? > + .arg5_type = ARG_CONST_SIZE, ARG_CONST_SIZE_OR_ZERO? > +}; > + > const struct bpf_func_proto * > bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > { > @@ -1356,6 +1395,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > return &bpf_per_cpu_ptr_proto; > case BPF_FUNC_bpf_this_cpu_ptr: > return &bpf_this_cpu_ptr_proto; > + case BPF_FUNC_kallsyms_lookup: > + return &bpf_kallsyms_lookup_proto; > default: > return NULL; > } [...]
On Thu, 2020-11-26 at 23:35 -0800, Yonghong Song wrote: > On 11/26/20 8:57 AM, Florent Revest wrote: > > +BPF_CALL_5(bpf_kallsyms_lookup, u64, address, char *, symbol, u32, > > symbol_size, > > + char *, module, u32, module_size) > > +{ > > + char buffer[KSYM_SYMBOL_LEN]; > > + unsigned long offset, size; > > + const char *name; > > + char *modname; > > + long ret; > > + > > + name = kallsyms_lookup(address, &size, &offset, &modname, > > buffer); > > + if (!name) > > + return -EINVAL; > > + > > + ret = strlen(name) + 1; > > + if (symbol_size) { > > + strncpy(symbol, name, symbol_size); > > + symbol[symbol_size - 1] = '\0'; > > + } > > + > > + if (modname && module_size) { > > + strncpy(module, modname, module_size); > > + module[module_size - 1] = '\0'; > > In this case, module name may be truncated and user did not get any > indication from return value. In the helper description, it is > mentioned that module name currently is most 64 bytes. But from UAPI > perspective, it may be still good to return something to let user > know the name is truncated. > > I do not know what is the best way to do this. One suggestion is > to break it into two helpers, one for symbol name and another > for module name. What is the use cases people want to get both > symbol name and module name and is it common? Fair, I can split this into two helpers :) The lookup would be done twice but I don't think that's a big deal.
On Fri, 2020-11-27 at 03:32 +0100, KP Singh wrote: > > + ret = strlen(name) + 1; > > + if (symbol_size) { > > + strncpy(symbol, name, symbol_size); > > + symbol[symbol_size - 1] = '\0'; > > + } > > + > > + if (modname && module_size) { > > + strncpy(module, modname, module_size); > > The return value does not seem to be impacted by the truncation of > the module name, I wonder if it is better to just use a single > buffer. > > For example, the proc kallsyms shows symbols as: > > <symbol_name> [module_name] > > https://github.com/torvalds/linux/blob/master/kernel/kallsyms.c#L648 > > The square brackets do seem to be a waste here, so maybe we could use > a single character as a separator? I prefer Yongonhong's suggestion of having two helpers. This gives more control to the BPF program. For example, they could decide to audit only addresses coming from a module, and that would be easier to do with two helpers than by parsing a string in BPF.
On Fri, 2020-11-27 at 10:25 +0100, Florent Revest wrote:
> I prefer Yongonhong's suggestion of having two helpers.
Argh! I hit enter too fast! Yonghong*, sorry :|
On Fri, Nov 27, 2020 at 8:35 AM Yonghong Song <yhs@fb.com> wrote: > > > > On 11/26/20 8:57 AM, Florent Revest wrote: > > This helper exposes the kallsyms_lookup function to eBPF tracing > > programs. This can be used to retrieve the name of the symbol at an > > address. For example, when hooking into nf_register_net_hook, one can > > audit the name of the registered netfilter hook and potentially also > > the name of the module in which the symbol is located. > > > > Signed-off-by: Florent Revest <revest@google.com> > > --- > > include/uapi/linux/bpf.h | 16 +++++++++++++ > > kernel/trace/bpf_trace.c | 41 ++++++++++++++++++++++++++++++++++ > > tools/include/uapi/linux/bpf.h | 16 +++++++++++++ > > 3 files changed, 73 insertions(+) > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > index c3458ec1f30a..670998635eac 100644 > > --- a/include/uapi/linux/bpf.h > > +++ b/include/uapi/linux/bpf.h > > @@ -3817,6 +3817,21 @@ union bpf_attr { > > * The **hash_algo** is returned on success, > > * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if > > * invalid arguments are passed. > > + * > > + * long bpf_kallsyms_lookup(u64 address, char *symbol, u32 symbol_size, char *module, u32 module_size) > > + * Description > > + * Uses kallsyms to write the name of the symbol at *address* > > + * into *symbol* of size *symbol_sz*. This is guaranteed to be > > + * zero terminated. > > + * If the symbol is in a module, up to *module_size* bytes of > > + * the module name is written in *module*. This is also > > + * guaranteed to be zero-terminated. Note: a module name > > + * is always shorter than 64 bytes. > > + * Return > > + * On success, the strictly positive length of the full symbol > > + * name, If this is greater than *symbol_size*, the written > > + * symbol is truncated. > > + * On error, a negative value. > > */ > > #define __BPF_FUNC_MAPPER(FN) \ > > FN(unspec), \ > > @@ -3981,6 +3996,7 @@ union bpf_attr { > > FN(bprm_opts_set), \ > > FN(ktime_get_coarse_ns), \ > > FN(ima_inode_hash), \ > > + FN(kallsyms_lookup), \ > > /* */ > > > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > > index d255bc9b2bfa..9d86e20c2b13 100644 > > --- a/kernel/trace/bpf_trace.c > > +++ b/kernel/trace/bpf_trace.c > > @@ -17,6 +17,7 @@ > > #include <linux/error-injection.h> > > #include <linux/btf_ids.h> > > #include <linux/bpf_lsm.h> > > +#include <linux/kallsyms.h> > > > > #include <net/bpf_sk_storage.h> > > > > @@ -1260,6 +1261,44 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = { > > .arg5_type = ARG_ANYTHING, > > }; > > > > +BPF_CALL_5(bpf_kallsyms_lookup, u64, address, char *, symbol, u32, symbol_size, > > + char *, module, u32, module_size) > > +{ > > + char buffer[KSYM_SYMBOL_LEN]; > > + unsigned long offset, size; > > + const char *name; > > + char *modname; > > + long ret; > > + > > + name = kallsyms_lookup(address, &size, &offset, &modname, buffer); > > + if (!name) > > + return -EINVAL; > > + > > + ret = strlen(name) + 1; > > + if (symbol_size) { > > + strncpy(symbol, name, symbol_size); > > + symbol[symbol_size - 1] = '\0'; > > + } > > + > > + if (modname && module_size) { > > + strncpy(module, modname, module_size); > > + module[module_size - 1] = '\0'; > > In this case, module name may be truncated and user did not get any > indication from return value. In the helper description, it is mentioned > that module name currently is most 64 bytes. But from UAPI perspective, > it may be still good to return something to let user know the name > is truncated. > > I do not know what is the best way to do this. One suggestion is > to break it into two helpers, one for symbol name and another I think it would be slightly preferable to have one helper though. maybe something like bpf_get_symbol_info (better names anyone? :)) with flags to get the module name or the symbol name depending on the flag? > for module name. What is the use cases people want to get both > symbol name and module name and is it common? The use case would be to disambiguate symbols in the kernel from the ones from a kernel module. Similar to what /proc/kallsyms does: T cpufreq_gov_powersave_init [cpufreq_powersave] > > > + } > > + > > + return ret; > > +} > > + > > +const struct bpf_func_proto bpf_kallsyms_lookup_proto = { > > + .func = bpf_kallsyms_lookup, > > + .gpl_only = false, > > + .ret_type = RET_INTEGER, > > + .arg1_type = ARG_ANYTHING, > > + .arg2_type = ARG_PTR_TO_MEM, > ARG_PTR_TO_UNINIT_MEM? > > > + .arg3_type = ARG_CONST_SIZE, > ARG_CONST_SIZE_OR_ZERO? This is especially true for current format > which tries to return both symbol name and module name and > user may just want to do one of them. > > > + .arg4_type = ARG_PTR_TO_MEM, > ARG_PTR_TO_UNINIT_MEM? > > > + .arg5_type = ARG_CONST_SIZE, > ARG_CONST_SIZE_OR_ZERO? > > > +}; > > + > > const struct bpf_func_proto * > > bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > > { > > @@ -1356,6 +1395,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > > return &bpf_per_cpu_ptr_proto; > > case BPF_FUNC_bpf_this_cpu_ptr: > > return &bpf_this_cpu_ptr_proto; > > + case BPF_FUNC_kallsyms_lookup: > > + return &bpf_kallsyms_lookup_proto; > > default: > > return NULL; > > } > [...]
On 11/27/20 3:20 AM, KP Singh wrote: > On Fri, Nov 27, 2020 at 8:35 AM Yonghong Song <yhs@fb.com> wrote: >> >> >> >> On 11/26/20 8:57 AM, Florent Revest wrote: >>> This helper exposes the kallsyms_lookup function to eBPF tracing >>> programs. This can be used to retrieve the name of the symbol at an >>> address. For example, when hooking into nf_register_net_hook, one can >>> audit the name of the registered netfilter hook and potentially also >>> the name of the module in which the symbol is located. >>> >>> Signed-off-by: Florent Revest <revest@google.com> >>> --- >>> include/uapi/linux/bpf.h | 16 +++++++++++++ >>> kernel/trace/bpf_trace.c | 41 ++++++++++++++++++++++++++++++++++ >>> tools/include/uapi/linux/bpf.h | 16 +++++++++++++ >>> 3 files changed, 73 insertions(+) >>> >>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >>> index c3458ec1f30a..670998635eac 100644 >>> --- a/include/uapi/linux/bpf.h >>> +++ b/include/uapi/linux/bpf.h >>> @@ -3817,6 +3817,21 @@ union bpf_attr { >>> * The **hash_algo** is returned on success, >>> * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if >>> * invalid arguments are passed. >>> + * >>> + * long bpf_kallsyms_lookup(u64 address, char *symbol, u32 symbol_size, char *module, u32 module_size) >>> + * Description >>> + * Uses kallsyms to write the name of the symbol at *address* >>> + * into *symbol* of size *symbol_sz*. This is guaranteed to be >>> + * zero terminated. >>> + * If the symbol is in a module, up to *module_size* bytes of >>> + * the module name is written in *module*. This is also >>> + * guaranteed to be zero-terminated. Note: a module name >>> + * is always shorter than 64 bytes. >>> + * Return >>> + * On success, the strictly positive length of the full symbol >>> + * name, If this is greater than *symbol_size*, the written >>> + * symbol is truncated. >>> + * On error, a negative value. >>> */ >>> #define __BPF_FUNC_MAPPER(FN) \ >>> FN(unspec), \ >>> @@ -3981,6 +3996,7 @@ union bpf_attr { >>> FN(bprm_opts_set), \ >>> FN(ktime_get_coarse_ns), \ >>> FN(ima_inode_hash), \ >>> + FN(kallsyms_lookup), \ >>> /* */ >>> >>> /* integer value in 'imm' field of BPF_CALL instruction selects which helper >>> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c >>> index d255bc9b2bfa..9d86e20c2b13 100644 >>> --- a/kernel/trace/bpf_trace.c >>> +++ b/kernel/trace/bpf_trace.c >>> @@ -17,6 +17,7 @@ >>> #include <linux/error-injection.h> >>> #include <linux/btf_ids.h> >>> #include <linux/bpf_lsm.h> >>> +#include <linux/kallsyms.h> >>> >>> #include <net/bpf_sk_storage.h> >>> >>> @@ -1260,6 +1261,44 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = { >>> .arg5_type = ARG_ANYTHING, >>> }; >>> >>> +BPF_CALL_5(bpf_kallsyms_lookup, u64, address, char *, symbol, u32, symbol_size, >>> + char *, module, u32, module_size) >>> +{ >>> + char buffer[KSYM_SYMBOL_LEN]; >>> + unsigned long offset, size; >>> + const char *name; >>> + char *modname; >>> + long ret; >>> + >>> + name = kallsyms_lookup(address, &size, &offset, &modname, buffer); >>> + if (!name) >>> + return -EINVAL; >>> + >>> + ret = strlen(name) + 1; >>> + if (symbol_size) { >>> + strncpy(symbol, name, symbol_size); >>> + symbol[symbol_size - 1] = '\0'; >>> + } >>> + >>> + if (modname && module_size) { >>> + strncpy(module, modname, module_size); >>> + module[module_size - 1] = '\0'; >> >> In this case, module name may be truncated and user did not get any >> indication from return value. In the helper description, it is mentioned >> that module name currently is most 64 bytes. But from UAPI perspective, >> it may be still good to return something to let user know the name >> is truncated. >> >> I do not know what is the best way to do this. One suggestion is >> to break it into two helpers, one for symbol name and another > > I think it would be slightly preferable to have one helper though. > maybe something like bpf_get_symbol_info (better names anyone? :)) > with flags to get the module name or the symbol name depending > on the flag? This works even better. Previously I am thinking if we have two helpers, we can add flags for each of them for future extension. But we can certainly have just one helper with flags to indicate whether this is for module name or for symbol name or something else. The buffer can be something like union bpf_ksymbol_info { char module_name[]; char symbol_name[]; ... } and flags will indicate what information user wants. > >> for module name. What is the use cases people want to get both >> symbol name and module name and is it common? > > The use case would be to disambiguate symbols in the > kernel from the ones from a kernel module. Similar to what > /proc/kallsyms does: > > T cpufreq_gov_powersave_init [cpufreq_powersave] > >> >>> + } >>> + >>> + return ret; >>> +} >>> + >>> +const struct bpf_func_proto bpf_kallsyms_lookup_proto = { >>> + .func = bpf_kallsyms_lookup, >>> + .gpl_only = false, >>> + .ret_type = RET_INTEGER, >>> + .arg1_type = ARG_ANYTHING, >>> + .arg2_type = ARG_PTR_TO_MEM, >> ARG_PTR_TO_UNINIT_MEM? >> >>> + .arg3_type = ARG_CONST_SIZE, >> ARG_CONST_SIZE_OR_ZERO? This is especially true for current format >> which tries to return both symbol name and module name and >> user may just want to do one of them. >> >>> + .arg4_type = ARG_PTR_TO_MEM, >> ARG_PTR_TO_UNINIT_MEM? >> >>> + .arg5_type = ARG_CONST_SIZE, >> ARG_CONST_SIZE_OR_ZERO? >> >>> +}; >>> + >>> const struct bpf_func_proto * >>> bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) >>> { >>> @@ -1356,6 +1395,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) >>> return &bpf_per_cpu_ptr_proto; >>> case BPF_FUNC_bpf_this_cpu_ptr: >>> return &bpf_this_cpu_ptr_proto; >>> + case BPF_FUNC_kallsyms_lookup: >>> + return &bpf_kallsyms_lookup_proto; >>> default: >>> return NULL; >>> } >> [...]
Hi Florent,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on bpf-next/master]
url: https://github.com/0day-ci/linux/commits/Florent-Revest/bpf-Add-a-bpf_kallsyms_lookup-helper/20201127-010044
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: i386-randconfig-s002-20201127 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce:
# apt-get install sparse
# sparse version: v0.6.3-151-g540c2c4b-dirty
# https://github.com/0day-ci/linux/commit/5ddc01183fb25936551dbf6d0b875f8d75dccdf3
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Florent-Revest/bpf-Add-a-bpf_kallsyms_lookup-helper/20201127-010044
git checkout 5ddc01183fb25936551dbf6d0b875f8d75dccdf3
# save the attached .config to linux build tree
make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
"sparse warnings: (new ones prefixed by >>)"
>> kernel/trace/bpf_trace.c:1291:29: sparse: sparse: symbol 'bpf_kallsyms_lookup_proto' was not declared. Should it be static?
Please review and possibly fold the followup patch.
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
On Thu, Nov 26, 2020 at 05:57:47PM +0100, Florent Revest wrote: > This helper exposes the kallsyms_lookup function to eBPF tracing > programs. This can be used to retrieve the name of the symbol at an > address. For example, when hooking into nf_register_net_hook, one can > audit the name of the registered netfilter hook and potentially also > the name of the module in which the symbol is located. > > Signed-off-by: Florent Revest <revest@google.com> > --- > include/uapi/linux/bpf.h | 16 +++++++++++++ > kernel/trace/bpf_trace.c | 41 ++++++++++++++++++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 16 +++++++++++++ > 3 files changed, 73 insertions(+) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index c3458ec1f30a..670998635eac 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -3817,6 +3817,21 @@ union bpf_attr { > * The **hash_algo** is returned on success, > * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if > * invalid arguments are passed. > + * > + * long bpf_kallsyms_lookup(u64 address, char *symbol, u32 symbol_size, char *module, u32 module_size) > + * Description > + * Uses kallsyms to write the name of the symbol at *address* > + * into *symbol* of size *symbol_sz*. This is guaranteed to be > + * zero terminated. > + * If the symbol is in a module, up to *module_size* bytes of > + * the module name is written in *module*. This is also > + * guaranteed to be zero-terminated. Note: a module name > + * is always shorter than 64 bytes. > + * Return > + * On success, the strictly positive length of the full symbol > + * name, If this is greater than *symbol_size*, the written > + * symbol is truncated. > + * On error, a negative value. Looks like debug-only helper. I cannot think of a way to use in production code. What program suppose to do with that string? Do string compare? BPF side doesn't have a good way to do string manipulations. If you really need to print a symbolic name for a given address I'd rather extend bpf_trace_printk() to support %pS
On Sat, 2020-11-28 at 17:07 -0800, Alexei Starovoitov wrote: > On Thu, Nov 26, 2020 at 05:57:47PM +0100, Florent Revest wrote: > > This helper exposes the kallsyms_lookup function to eBPF tracing > > programs. This can be used to retrieve the name of the symbol at an > > address. For example, when hooking into nf_register_net_hook, one > > can > > audit the name of the registered netfilter hook and potentially > > also > > the name of the module in which the symbol is located. > > > > Signed-off-by: Florent Revest <revest@google.com> > > --- > > include/uapi/linux/bpf.h | 16 +++++++++++++ > > kernel/trace/bpf_trace.c | 41 > > ++++++++++++++++++++++++++++++++++ > > tools/include/uapi/linux/bpf.h | 16 +++++++++++++ > > 3 files changed, 73 insertions(+) > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > index c3458ec1f30a..670998635eac 100644 > > --- a/include/uapi/linux/bpf.h > > +++ b/include/uapi/linux/bpf.h > > @@ -3817,6 +3817,21 @@ union bpf_attr { > > * The **hash_algo** is returned on success, > > * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if > > * invalid arguments are passed. > > + * > > + * long bpf_kallsyms_lookup(u64 address, char *symbol, u32 > > symbol_size, char *module, u32 module_size) > > + * Description > > + * Uses kallsyms to write the name of the symbol at > > *address* > > + * into *symbol* of size *symbol_sz*. This is guaranteed > > to be > > + * zero terminated. > > + * If the symbol is in a module, up to *module_size* bytes > > of > > + * the module name is written in *module*. This is also > > + * guaranteed to be zero-terminated. Note: a module name > > + * is always shorter than 64 bytes. > > + * Return > > + * On success, the strictly positive length of the full > > symbol > > + * name, If this is greater than *symbol_size*, the > > written > > + * symbol is truncated. > > + * On error, a negative value. > > Looks like debug-only helper. > I cannot think of a way to use in production code. > What program suppose to do with that string? > Do string compare? BPF side doesn't have a good way to do string > manipulations. > If you really need to print a symbolic name for a given address > I'd rather extend bpf_trace_printk() to support %pS We actually use this helper for auditing, not debugging. We don't want to parse /proc/kallsyms from userspace because we have no guarantee that the module will still be loaded by the time the event reaches userspace (this is also faster in kernelspace).
On Mon, Nov 30, 2020 at 05:23:22PM +0100, Florent Revest wrote: > On Sat, 2020-11-28 at 17:07 -0800, Alexei Starovoitov wrote: > > On Thu, Nov 26, 2020 at 05:57:47PM +0100, Florent Revest wrote: > > > This helper exposes the kallsyms_lookup function to eBPF tracing > > > programs. This can be used to retrieve the name of the symbol at an > > > address. For example, when hooking into nf_register_net_hook, one > > > can > > > audit the name of the registered netfilter hook and potentially > > > also > > > the name of the module in which the symbol is located. > > > > > > Signed-off-by: Florent Revest <revest@google.com> > > > --- > > > include/uapi/linux/bpf.h | 16 +++++++++++++ > > > kernel/trace/bpf_trace.c | 41 > > > ++++++++++++++++++++++++++++++++++ > > > tools/include/uapi/linux/bpf.h | 16 +++++++++++++ > > > 3 files changed, 73 insertions(+) > > > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > > index c3458ec1f30a..670998635eac 100644 > > > --- a/include/uapi/linux/bpf.h > > > +++ b/include/uapi/linux/bpf.h > > > @@ -3817,6 +3817,21 @@ union bpf_attr { > > > * The **hash_algo** is returned on success, > > > * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if > > > * invalid arguments are passed. > > > + * > > > + * long bpf_kallsyms_lookup(u64 address, char *symbol, u32 > > > symbol_size, char *module, u32 module_size) > > > + * Description > > > + * Uses kallsyms to write the name of the symbol at > > > *address* > > > + * into *symbol* of size *symbol_sz*. This is guaranteed > > > to be > > > + * zero terminated. > > > + * If the symbol is in a module, up to *module_size* bytes > > > of > > > + * the module name is written in *module*. This is also > > > + * guaranteed to be zero-terminated. Note: a module name > > > + * is always shorter than 64 bytes. > > > + * Return > > > + * On success, the strictly positive length of the full > > > symbol > > > + * name, If this is greater than *symbol_size*, the > > > written > > > + * symbol is truncated. > > > + * On error, a negative value. > > > > Looks like debug-only helper. > > I cannot think of a way to use in production code. > > What program suppose to do with that string? > > Do string compare? BPF side doesn't have a good way to do string > > manipulations. > > If you really need to print a symbolic name for a given address > > I'd rather extend bpf_trace_printk() to support %pS > > We actually use this helper for auditing, not debugging. > We don't want to parse /proc/kallsyms from userspace because we have no > guarantee that the module will still be loaded by the time the event > reaches userspace (this is also faster in kernelspace). so what are you going to do with that string? print it? send to user space via ring buffer? Where are you getting that $pc ?
On Mon, 2020-11-30 at 18:41 -0800, Alexei Starovoitov wrote: > On Mon, Nov 30, 2020 at 05:23:22PM +0100, Florent Revest wrote: > > On Sat, 2020-11-28 at 17:07 -0800, Alexei Starovoitov wrote: > > > Looks like debug-only helper. > > > I cannot think of a way to use in production code. > > > What program suppose to do with that string? > > > Do string compare? BPF side doesn't have a good way to do string > > > manipulations. > > > If you really need to print a symbolic name for a given address > > > I'd rather extend bpf_trace_printk() to support %pS > > > > We actually use this helper for auditing, not debugging. > > We don't want to parse /proc/kallsyms from userspace because we > > have no guarantee that the module will still be loaded by the time > > the event reaches userspace (this is also faster in kernelspace). > > so what are you going to do with that string? > print it? send to user space via ring buffer? We send our auditing events down to the userspace via a ring buffer and then events are aggregated and looked at by security analysts. Having the symbol and module names instead of a hex address makes these events more meaningful. > Where are you getting that $pc ? I give an example in the commit description: we hook into callback registration functions (for example, nf_register_net_hook), get the callback address from the function arguments and log audit information about the registered callback. For example, we want to know the name of the module in which the callback belongs and the symbol name also helps enrich the event.
On Fri, Nov 27, 2020 at 3:20 AM KP Singh <kpsingh@chromium.org> wrote: > > On Fri, Nov 27, 2020 at 8:35 AM Yonghong Song <yhs@fb.com> wrote: > > > > > > > > On 11/26/20 8:57 AM, Florent Revest wrote: > > > This helper exposes the kallsyms_lookup function to eBPF tracing > > > programs. This can be used to retrieve the name of the symbol at an > > > address. For example, when hooking into nf_register_net_hook, one can > > > audit the name of the registered netfilter hook and potentially also > > > the name of the module in which the symbol is located. > > > > > > Signed-off-by: Florent Revest <revest@google.com> > > > --- > > > include/uapi/linux/bpf.h | 16 +++++++++++++ > > > kernel/trace/bpf_trace.c | 41 ++++++++++++++++++++++++++++++++++ > > > tools/include/uapi/linux/bpf.h | 16 +++++++++++++ > > > 3 files changed, 73 insertions(+) > > > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > > index c3458ec1f30a..670998635eac 100644 > > > --- a/include/uapi/linux/bpf.h > > > +++ b/include/uapi/linux/bpf.h > > > @@ -3817,6 +3817,21 @@ union bpf_attr { > > > * The **hash_algo** is returned on success, > > > * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if > > > * invalid arguments are passed. > > > + * > > > + * long bpf_kallsyms_lookup(u64 address, char *symbol, u32 symbol_size, char *module, u32 module_size) > > > + * Description > > > + * Uses kallsyms to write the name of the symbol at *address* > > > + * into *symbol* of size *symbol_sz*. This is guaranteed to be > > > + * zero terminated. > > > + * If the symbol is in a module, up to *module_size* bytes of > > > + * the module name is written in *module*. This is also > > > + * guaranteed to be zero-terminated. Note: a module name > > > + * is always shorter than 64 bytes. > > > + * Return > > > + * On success, the strictly positive length of the full symbol > > > + * name, If this is greater than *symbol_size*, the written > > > + * symbol is truncated. > > > + * On error, a negative value. > > > */ > > > #define __BPF_FUNC_MAPPER(FN) \ > > > FN(unspec), \ > > > @@ -3981,6 +3996,7 @@ union bpf_attr { > > > FN(bprm_opts_set), \ > > > FN(ktime_get_coarse_ns), \ > > > FN(ima_inode_hash), \ > > > + FN(kallsyms_lookup), \ > > > /* */ > > > > > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > > > index d255bc9b2bfa..9d86e20c2b13 100644 > > > --- a/kernel/trace/bpf_trace.c > > > +++ b/kernel/trace/bpf_trace.c > > > @@ -17,6 +17,7 @@ > > > #include <linux/error-injection.h> > > > #include <linux/btf_ids.h> > > > #include <linux/bpf_lsm.h> > > > +#include <linux/kallsyms.h> > > > > > > #include <net/bpf_sk_storage.h> > > > > > > @@ -1260,6 +1261,44 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = { > > > .arg5_type = ARG_ANYTHING, > > > }; > > > > > > +BPF_CALL_5(bpf_kallsyms_lookup, u64, address, char *, symbol, u32, symbol_size, > > > + char *, module, u32, module_size) > > > +{ > > > + char buffer[KSYM_SYMBOL_LEN]; > > > + unsigned long offset, size; > > > + const char *name; > > > + char *modname; > > > + long ret; > > > + > > > + name = kallsyms_lookup(address, &size, &offset, &modname, buffer); > > > + if (!name) > > > + return -EINVAL; > > > + > > > + ret = strlen(name) + 1; > > > + if (symbol_size) { > > > + strncpy(symbol, name, symbol_size); > > > + symbol[symbol_size - 1] = '\0'; > > > + } > > > + > > > + if (modname && module_size) { > > > + strncpy(module, modname, module_size); > > > + module[module_size - 1] = '\0'; > > > > In this case, module name may be truncated and user did not get any > > indication from return value. In the helper description, it is mentioned > > that module name currently is most 64 bytes. But from UAPI perspective, > > it may be still good to return something to let user know the name > > is truncated. > > > > I do not know what is the best way to do this. One suggestion is > > to break it into two helpers, one for symbol name and another > > I think it would be slightly preferable to have one helper though. > maybe something like bpf_get_symbol_info (better names anyone? :)) bpf_ksym_resolve()? > with flags to get the module name or the symbol name depending > on the flag? > > > for module name. What is the use cases people want to get both > > symbol name and module name and is it common? > > The use case would be to disambiguate symbols in the > kernel from the ones from a kernel module. Similar to what > /proc/kallsyms does: > > T cpufreq_gov_powersave_init [cpufreq_powersave] > > > > > > + } > > > + > > > + return ret; > > > +} > > > + > > > +const struct bpf_func_proto bpf_kallsyms_lookup_proto = { > > > + .func = bpf_kallsyms_lookup, > > > + .gpl_only = false, > > > + .ret_type = RET_INTEGER, > > > + .arg1_type = ARG_ANYTHING, > > > + .arg2_type = ARG_PTR_TO_MEM, > > ARG_PTR_TO_UNINIT_MEM? > > > > > + .arg3_type = ARG_CONST_SIZE, > > ARG_CONST_SIZE_OR_ZERO? This is especially true for current format > > which tries to return both symbol name and module name and > > user may just want to do one of them. > > > > > + .arg4_type = ARG_PTR_TO_MEM, > > ARG_PTR_TO_UNINIT_MEM? > > > > > + .arg5_type = ARG_CONST_SIZE, > > ARG_CONST_SIZE_OR_ZERO? > > > > > +}; > > > + > > > const struct bpf_func_proto * > > > bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > > > { > > > @@ -1356,6 +1395,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > > > return &bpf_per_cpu_ptr_proto; > > > case BPF_FUNC_bpf_this_cpu_ptr: > > > return &bpf_this_cpu_ptr_proto; > > > + case BPF_FUNC_kallsyms_lookup: > > > + return &bpf_kallsyms_lookup_proto; > > > default: > > > return NULL; > > > } > > [...]
On Fri, Nov 27, 2020 at 8:09 AM Yonghong Song <yhs@fb.com> wrote: > > > > On 11/27/20 3:20 AM, KP Singh wrote: > > On Fri, Nov 27, 2020 at 8:35 AM Yonghong Song <yhs@fb.com> wrote: > >> > >> > >> > >> On 11/26/20 8:57 AM, Florent Revest wrote: > >>> This helper exposes the kallsyms_lookup function to eBPF tracing > >>> programs. This can be used to retrieve the name of the symbol at an > >>> address. For example, when hooking into nf_register_net_hook, one can > >>> audit the name of the registered netfilter hook and potentially also > >>> the name of the module in which the symbol is located. > >>> > >>> Signed-off-by: Florent Revest <revest@google.com> > >>> --- > >>> include/uapi/linux/bpf.h | 16 +++++++++++++ > >>> kernel/trace/bpf_trace.c | 41 ++++++++++++++++++++++++++++++++++ > >>> tools/include/uapi/linux/bpf.h | 16 +++++++++++++ > >>> 3 files changed, 73 insertions(+) > >>> > >>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > >>> index c3458ec1f30a..670998635eac 100644 > >>> --- a/include/uapi/linux/bpf.h > >>> +++ b/include/uapi/linux/bpf.h > >>> @@ -3817,6 +3817,21 @@ union bpf_attr { > >>> * The **hash_algo** is returned on success, > >>> * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if > >>> * invalid arguments are passed. > >>> + * > >>> + * long bpf_kallsyms_lookup(u64 address, char *symbol, u32 symbol_size, char *module, u32 module_size) > >>> + * Description > >>> + * Uses kallsyms to write the name of the symbol at *address* > >>> + * into *symbol* of size *symbol_sz*. This is guaranteed to be > >>> + * zero terminated. > >>> + * If the symbol is in a module, up to *module_size* bytes of > >>> + * the module name is written in *module*. This is also > >>> + * guaranteed to be zero-terminated. Note: a module name > >>> + * is always shorter than 64 bytes. > >>> + * Return > >>> + * On success, the strictly positive length of the full symbol > >>> + * name, If this is greater than *symbol_size*, the written > >>> + * symbol is truncated. > >>> + * On error, a negative value. > >>> */ > >>> #define __BPF_FUNC_MAPPER(FN) \ > >>> FN(unspec), \ > >>> @@ -3981,6 +3996,7 @@ union bpf_attr { > >>> FN(bprm_opts_set), \ > >>> FN(ktime_get_coarse_ns), \ > >>> FN(ima_inode_hash), \ > >>> + FN(kallsyms_lookup), \ > >>> /* */ > >>> > >>> /* integer value in 'imm' field of BPF_CALL instruction selects which helper > >>> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > >>> index d255bc9b2bfa..9d86e20c2b13 100644 > >>> --- a/kernel/trace/bpf_trace.c > >>> +++ b/kernel/trace/bpf_trace.c > >>> @@ -17,6 +17,7 @@ > >>> #include <linux/error-injection.h> > >>> #include <linux/btf_ids.h> > >>> #include <linux/bpf_lsm.h> > >>> +#include <linux/kallsyms.h> > >>> > >>> #include <net/bpf_sk_storage.h> > >>> > >>> @@ -1260,6 +1261,44 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = { > >>> .arg5_type = ARG_ANYTHING, > >>> }; > >>> > >>> +BPF_CALL_5(bpf_kallsyms_lookup, u64, address, char *, symbol, u32, symbol_size, > >>> + char *, module, u32, module_size) > >>> +{ > >>> + char buffer[KSYM_SYMBOL_LEN]; > >>> + unsigned long offset, size; > >>> + const char *name; > >>> + char *modname; > >>> + long ret; > >>> + > >>> + name = kallsyms_lookup(address, &size, &offset, &modname, buffer); > >>> + if (!name) > >>> + return -EINVAL; > >>> + > >>> + ret = strlen(name) + 1; > >>> + if (symbol_size) { > >>> + strncpy(symbol, name, symbol_size); > >>> + symbol[symbol_size - 1] = '\0'; > >>> + } > >>> + > >>> + if (modname && module_size) { > >>> + strncpy(module, modname, module_size); > >>> + module[module_size - 1] = '\0'; > >> > >> In this case, module name may be truncated and user did not get any > >> indication from return value. In the helper description, it is mentioned > >> that module name currently is most 64 bytes. But from UAPI perspective, > >> it may be still good to return something to let user know the name > >> is truncated. > >> > >> I do not know what is the best way to do this. One suggestion is > >> to break it into two helpers, one for symbol name and another > > > > I think it would be slightly preferable to have one helper though. > > maybe something like bpf_get_symbol_info (better names anyone? :)) > > with flags to get the module name or the symbol name depending > > on the flag? > > This works even better. Previously I am thinking if we have two helpers, > we can add flags for each of them for future extension. But we > can certainly have just one helper with flags to indicate > whether this is for module name or for symbol name or something else. > > The buffer can be something like > union bpf_ksymbol_info { > char module_name[]; > char symbol_name[]; > ... > } > and flags will indicate what information user wants. one more thing that might be useful to resolve to the symbol's "base address". E.g., if we have IP inside the function, this would resolve to the start of the function, sort of "canonical" symbol address. Type of ksym is another "characteristic" which could be returned (as a single char?) I wouldn't define bpf_ksymbol_info, though. Just depending on the flag, specify what kind of memory layou (e.g., for strings - zero-terminated string, for address - 8 byte numbers, etc). That way we can also allow fetching multiple things together, they would just be laid out one after another in memory. E.g.: char buf[256]; int err = bpf_ksym_resolve(<addr>, BPF_KSYM_NAME | BPF_KSYM_MODNAME | BPF_KSYM_BASE_ADDR, buf, sizeof(buf)); if (err == -E2BIG) /* need bigger buffer, but all the data up to truncation point is filled in */ else /* err has exact number of bytes used, including zero terminator(s) */ /* data is laid out as "cpufreq_gov_powersave_init\0cpufreq_powersave\0\x12\x23\x45\x56\x12\x23\x45\x56" */ > > > > >> for module name. What is the use cases people want to get both > >> symbol name and module name and is it common? > > > > The use case would be to disambiguate symbols in the > > kernel from the ones from a kernel module. Similar to what > > /proc/kallsyms does: > > > > T cpufreq_gov_powersave_init [cpufreq_powersave] > > > >> > >>> + } > >>> + > >>> + return ret; > >>> +} > >>> + > >>> +const struct bpf_func_proto bpf_kallsyms_lookup_proto = { > >>> + .func = bpf_kallsyms_lookup, > >>> + .gpl_only = false, > >>> + .ret_type = RET_INTEGER, > >>> + .arg1_type = ARG_ANYTHING, > >>> + .arg2_type = ARG_PTR_TO_MEM, > >> ARG_PTR_TO_UNINIT_MEM? > >> > >>> + .arg3_type = ARG_CONST_SIZE, > >> ARG_CONST_SIZE_OR_ZERO? This is especially true for current format > >> which tries to return both symbol name and module name and > >> user may just want to do one of them. > >> > >>> + .arg4_type = ARG_PTR_TO_MEM, > >> ARG_PTR_TO_UNINIT_MEM? > >> > >>> + .arg5_type = ARG_CONST_SIZE, > >> ARG_CONST_SIZE_OR_ZERO? > >> > >>> +}; > >>> + > >>> const struct bpf_func_proto * > >>> bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > >>> { > >>> @@ -1356,6 +1395,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > >>> return &bpf_per_cpu_ptr_proto; > >>> case BPF_FUNC_bpf_this_cpu_ptr: > >>> return &bpf_this_cpu_ptr_proto; > >>> + case BPF_FUNC_kallsyms_lookup: > >>> + return &bpf_kallsyms_lookup_proto; > >>> default: > >>> return NULL; > >>> } > >> [...]
On Tue, 2020-12-01 at 16:55 -0800, Andrii Nakryiko wrote: > On Fri, Nov 27, 2020 at 8:09 AM Yonghong Song <yhs@fb.com> wrote: > > > > > > On 11/27/20 3:20 AM, KP Singh wrote: > > > On Fri, Nov 27, 2020 at 8:35 AM Yonghong Song <yhs@fb.com> wrote: > > > > > > > > In this case, module name may be truncated and user did not get > > > > any indication from return value. In the helper description, it > > > > is mentioned that module name currently is most 64 bytes. But > > > > from UAPI perspective, it may be still good to return something > > > > to let user know the name is truncated. > > > > > > > > I do not know what is the best way to do this. One suggestion > > > > is to break it into two helpers, one for symbol name and > > > > another > > > > > > I think it would be slightly preferable to have one helper > > > though. maybe something like bpf_get_symbol_info (better names > > > anyone? :)) with flags to get the module name or the symbol name > > > depending > > > on the flag? > > > > This works even better. Previously I am thinking if we have two > > helpers, > > we can add flags for each of them for future extension. But we > > can certainly have just one helper with flags to indicate > > whether this is for module name or for symbol name or something > > else. > > > > The buffer can be something like > > union bpf_ksymbol_info { > > char module_name[]; > > char symbol_name[]; > > ... > > } > > and flags will indicate what information user wants. > > one more thing that might be useful to resolve to the symbol's "base > address". E.g., if we have IP inside the function, this would resolve > to the start of the function, sort of "canonical" symbol address. > Type of ksym is another "characteristic" which could be returned (as > a single char?) > > I wouldn't define bpf_ksymbol_info, though. Just depending on the > flag, specify what kind of memory layou (e.g., for strings - > zero-terminated string, for address - 8 byte numbers, etc). That way > we can also allow fetching multiple things together, they would just > be laid out one after another in memory. > > E.g.: > > char buf[256]; > int err = bpf_ksym_resolve(<addr>, BPF_KSYM_NAME | BPF_KSYM_MODNAME | > BPF_KSYM_BASE_ADDR, buf, sizeof(buf)); > > if (err == -E2BIG) > /* need bigger buffer, but all the data up to truncation point is > filled in */ > else > /* err has exact number of bytes used, including zero terminator(s) > */ > /* data is laid out as > "cpufreq_gov_powersave_init\0cpufreq_powersave\0\x12\x23\x45\x56\x12\ > x23\x45\x56" > */ Great idea! I like that, thanks for the suggestion :)
On Wed, Dec 2, 2020 at 12:32 PM Florent Revest <revest@chromium.org> wrote: > > On Tue, 2020-12-01 at 16:55 -0800, Andrii Nakryiko wrote: > > On Fri, Nov 27, 2020 at 8:09 AM Yonghong Song <yhs@fb.com> wrote: > > > > > > > > > On 11/27/20 3:20 AM, KP Singh wrote: > > > > On Fri, Nov 27, 2020 at 8:35 AM Yonghong Song <yhs@fb.com> wrote: > > > > > > > > > > In this case, module name may be truncated and user did not get > > > > > any indication from return value. In the helper description, it > > > > > is mentioned that module name currently is most 64 bytes. But > > > > > from UAPI perspective, it may be still good to return something > > > > > to let user know the name is truncated. > > > > > > > > > > I do not know what is the best way to do this. One suggestion > > > > > is to break it into two helpers, one for symbol name and > > > > > another > > > > > > > > I think it would be slightly preferable to have one helper > > > > though. maybe something like bpf_get_symbol_info (better names > > > > anyone? :)) with flags to get the module name or the symbol name > > > > depending > > > > on the flag? > > > > > > This works even better. Previously I am thinking if we have two > > > helpers, > > > we can add flags for each of them for future extension. But we > > > can certainly have just one helper with flags to indicate > > > whether this is for module name or for symbol name or something > > > else. > > > > > > The buffer can be something like > > > union bpf_ksymbol_info { > > > char module_name[]; > > > char symbol_name[]; > > > ... > > > } > > > and flags will indicate what information user wants. > > > > one more thing that might be useful to resolve to the symbol's "base > > address". E.g., if we have IP inside the function, this would resolve > > to the start of the function, sort of "canonical" symbol address. > > Type of ksym is another "characteristic" which could be returned (as > > a single char?) > > > > I wouldn't define bpf_ksymbol_info, though. Just depending on the > > flag, specify what kind of memory layou (e.g., for strings - > > zero-terminated string, for address - 8 byte numbers, etc). That way > > we can also allow fetching multiple things together, they would just > > be laid out one after another in memory. > > > > E.g.: > > > > char buf[256]; > > int err = bpf_ksym_resolve(<addr>, BPF_KSYM_NAME | BPF_KSYM_MODNAME | > > BPF_KSYM_BASE_ADDR, buf, sizeof(buf)); > > > > if (err == -E2BIG) > > /* need bigger buffer, but all the data up to truncation point is > > filled in */ > > else > > /* err has exact number of bytes used, including zero terminator(s) > > */ > > /* data is laid out as > > "cpufreq_gov_powersave_init\0cpufreq_powersave\0\x12\x23\x45\x56\x12\ > > x23\x45\x56" > > */ > > Great idea! I like that, thanks for the suggestion :) I still think that adopting printk/vsnprintf for this instead of reinventing the wheel is more flexible and easier to maintain long term. Almost the same layout can be done with vsnprintf with exception of \0 char. More meaningful names, etc. See Documentation/core-api/printk-formats.rst If we force fmt to come from readonly map then bpf_trace_printk()-like run-time check of fmt string can be moved into load time check and performance won't suffer.
On Wed, Dec 2, 2020 at 10:18 PM Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote: > I still think that adopting printk/vsnprintf for this instead of > reinventing the wheel > is more flexible and easier to maintain long term. > Almost the same layout can be done with vsnprintf > with exception of \0 char. > More meaningful names, etc. > See Documentation/core-api/printk-formats.rst I agree this would be nice. I finally got a bit of time to experiment with this and I noticed a few things: First of all, because helpers only have 5 arguments, if we use two for the output buffer and its size and two for the format string and its size, we are only left with one argument for a modifier. This is still enough for our usecase (where we'd only use "%ps" for example) but it does not strictly-speaking allow for the same layout that Andrii proposed. > If we force fmt to come from readonly map then bpf_trace_printk()-like > run-time check of fmt string can be moved into load time check > and performance won't suffer. Regarding this bit, I have the impression that this would not be possible, but maybe I'm missing something ? :) The iteration that bpf_trace_printk does over the format string argument is not only used for validation. It is also used to remember what extra operations need to be done based on the modifier types. For example, it remembers whether an arg should be interpreted as 32bits or 64bits. In the case of string printing, it also remembers whether it is a kernel-space or user-space pointer so that bpf_trace_copy_string can be called with the right arg. If we were to run the iteration over the format string in the verifier, how would you recommend that we "remember" the modifier type until the helper gets called ?
On 12/11/20 6:40 AM, Florent Revest wrote: > On Wed, Dec 2, 2020 at 10:18 PM Alexei Starovoitov > <alexei.starovoitov@gmail.com> wrote: >> I still think that adopting printk/vsnprintf for this instead of >> reinventing the wheel >> is more flexible and easier to maintain long term. >> Almost the same layout can be done with vsnprintf >> with exception of \0 char. >> More meaningful names, etc. >> See Documentation/core-api/printk-formats.rst > > I agree this would be nice. I finally got a bit of time to experiment > with this and I noticed a few things: > > First of all, because helpers only have 5 arguments, if we use two for > the output buffer and its size and two for the format string and its > size, we are only left with one argument for a modifier. This is still > enough for our usecase (where we'd only use "%ps" for example) but it > does not strictly-speaking allow for the same layout that Andrii > proposed. See helper bpf_seq_printf. It packs all arguments for format string and puts them into an array. bpf_seq_printf will unpack them as it parsed through the format string. So it should be doable to have more than "%ps" in format string. > >> If we force fmt to come from readonly map then bpf_trace_printk()-like >> run-time check of fmt string can be moved into load time check >> and performance won't suffer. > > Regarding this bit, I have the impression that this would not be > possible, but maybe I'm missing something ? :) > > The iteration that bpf_trace_printk does over the format string > argument is not only used for validation. It is also used to remember > what extra operations need to be done based on the modifier types. For > example, it remembers whether an arg should be interpreted as 32bits or > 64bits. In the case of string printing, it also remembers whether it is > a kernel-space or user-space pointer so that bpf_trace_copy_string can > be called with the right arg. If we were to run the iteration over the format > string in the verifier, how would you recommend that we > "remember" the modifier type until the helper gets called ? >
On Mon, Dec 14, 2020 at 7:47 AM Yonghong Song <yhs@fb.com> wrote: > On 12/11/20 6:40 AM, Florent Revest wrote: > > On Wed, Dec 2, 2020 at 10:18 PM Alexei Starovoitov > > <alexei.starovoitov@gmail.com> wrote: > >> I still think that adopting printk/vsnprintf for this instead of > >> reinventing the wheel > >> is more flexible and easier to maintain long term. > >> Almost the same layout can be done with vsnprintf > >> with exception of \0 char. > >> More meaningful names, etc. > >> See Documentation/core-api/printk-formats.rst > > > > I agree this would be nice. I finally got a bit of time to experiment > > with this and I noticed a few things: > > > > First of all, because helpers only have 5 arguments, if we use two for > > the output buffer and its size and two for the format string and its > > size, we are only left with one argument for a modifier. This is still > > enough for our usecase (where we'd only use "%ps" for example) but it > > does not strictly-speaking allow for the same layout that Andrii > > proposed. > > See helper bpf_seq_printf. It packs all arguments for format string and > puts them into an array. bpf_seq_printf will unpack them as it parsed > through the format string. So it should be doable to have more than > "%ps" in format string. This could be a nice trick, thank you for the suggestion Yonghong :) My understanding is that this would also require two extra args (one for the array of arguments and one for the size of this array) so it would still not fit the 5 arguments limit I described in my previous email. eg: this would not be possible: long bpf_snprintf(const char *out, u32 out_size, const char *fmt, u32 fmt_size, const void *data, u32 data_len) Would you then suggest that we also put the format string and its length in the first and second cells of this array and have something along the line of: long bpf_snprintf(const char *out, u32 out_size, const void *args, u32 args_len) ? This seems like a fairly opaque signature to me and harder to verify.
On 12/17/20 7:31 AM, Florent Revest wrote: > On Mon, Dec 14, 2020 at 7:47 AM Yonghong Song <yhs@fb.com> wrote: >> On 12/11/20 6:40 AM, Florent Revest wrote: >>> On Wed, Dec 2, 2020 at 10:18 PM Alexei Starovoitov >>> <alexei.starovoitov@gmail.com> wrote: >>>> I still think that adopting printk/vsnprintf for this instead of >>>> reinventing the wheel >>>> is more flexible and easier to maintain long term. >>>> Almost the same layout can be done with vsnprintf >>>> with exception of \0 char. >>>> More meaningful names, etc. >>>> See Documentation/core-api/printk-formats.rst >>> >>> I agree this would be nice. I finally got a bit of time to experiment >>> with this and I noticed a few things: >>> >>> First of all, because helpers only have 5 arguments, if we use two for >>> the output buffer and its size and two for the format string and its >>> size, we are only left with one argument for a modifier. This is still >>> enough for our usecase (where we'd only use "%ps" for example) but it >>> does not strictly-speaking allow for the same layout that Andrii >>> proposed. >> >> See helper bpf_seq_printf. It packs all arguments for format string and >> puts them into an array. bpf_seq_printf will unpack them as it parsed >> through the format string. So it should be doable to have more than >> "%ps" in format string. > > This could be a nice trick, thank you for the suggestion Yonghong :) > > My understanding is that this would also require two extra args (one > for the array of arguments and one for the size of this array) so it > would still not fit the 5 arguments limit I described in my previous > email. > eg: this would not be possible: > long bpf_snprintf(const char *out, u32 out_size, > const char *fmt, u32 fmt_size, > const void *data, u32 data_len) Right. bpf allows only up to 5 parameters. > > Would you then suggest that we also put the format string and its > length in the first and second cells of this array and have something > along the line of: > long bpf_snprintf(const char *out, u32 out_size, > const void *args, u32 args_len) ? > This seems like a fairly opaque signature to me and harder to verify. One way is to define an explicit type for args, something like struct bpf_fmt_str_data { char *fmt; u64 fmt_len; u64 data[]; }; The bpf_snprintf signature can be long bpf_snprintf(const char *out, u32 out_size, const struct bpf_fmt_str_data *fmt_data, u32 fmt_data_len); Internally you can have one argument type for "struct bpf_fmt_str_data" like PTR_TO_FMT_DATA as a verifier reg state. if bpf_snprintf is used, when you try to verify PTR_TO_FMT_DATA, you can just verify fmt_data->fmt and fmt_data->fmt_len which satifies mem contraints. The rest of data can be passed to the helper as is. Yes, still some verifier work. But may be useful for this and future format string related helpers.
On Thu, Dec 17, 2020 at 09:26:09AM -0800, Yonghong Song wrote: > > > On 12/17/20 7:31 AM, Florent Revest wrote: > > On Mon, Dec 14, 2020 at 7:47 AM Yonghong Song <yhs@fb.com> wrote: > > > On 12/11/20 6:40 AM, Florent Revest wrote: > > > > On Wed, Dec 2, 2020 at 10:18 PM Alexei Starovoitov > > > > <alexei.starovoitov@gmail.com> wrote: > > > > > I still think that adopting printk/vsnprintf for this instead of > > > > > reinventing the wheel > > > > > is more flexible and easier to maintain long term. > > > > > Almost the same layout can be done with vsnprintf > > > > > with exception of \0 char. > > > > > More meaningful names, etc. > > > > > See Documentation/core-api/printk-formats.rst > > > > > > > > I agree this would be nice. I finally got a bit of time to experiment > > > > with this and I noticed a few things: > > > > > > > > First of all, because helpers only have 5 arguments, if we use two for > > > > the output buffer and its size and two for the format string and its > > > > size, we are only left with one argument for a modifier. This is still > > > > enough for our usecase (where we'd only use "%ps" for example) but it > > > > does not strictly-speaking allow for the same layout that Andrii > > > > proposed. > > > > > > See helper bpf_seq_printf. It packs all arguments for format string and > > > puts them into an array. bpf_seq_printf will unpack them as it parsed > > > through the format string. So it should be doable to have more than > > > "%ps" in format string. > > > > This could be a nice trick, thank you for the suggestion Yonghong :) > > > > My understanding is that this would also require two extra args (one > > for the array of arguments and one for the size of this array) so it > > would still not fit the 5 arguments limit I described in my previous > > email. > > eg: this would not be possible: > > long bpf_snprintf(const char *out, u32 out_size, > > const char *fmt, u32 fmt_size, > > const void *data, u32 data_len) > > Right. bpf allows only up to 5 parameters. > > > > Would you then suggest that we also put the format string and its > > length in the first and second cells of this array and have something > > along the line of: > > long bpf_snprintf(const char *out, u32 out_size, > > const void *args, u32 args_len) ? > > This seems like a fairly opaque signature to me and harder to verify. > > One way is to define an explicit type for args, something like > struct bpf_fmt_str_data { > char *fmt; > u64 fmt_len; > u64 data[]; > }; that feels a bit convoluted. The reason I feel unease with the helper as was originally proposed and with Andrii's proposal is all the extra strlen and strcpy that needs to be done. In the helper we have to call kallsyms_lookup() which is ok interface for what it was desinged to do, but it's awkward to use to construct new string ("%s [%s]", sym, modname) or to send two strings into a ring buffer. Andrii's zero separator idea will simplify bpf prog, but user space would need to do strlen anyway if it needs to pretty print. If we take pain on converting addr to sym+modname let's figure out how to make it easy for the bpf prog to do and easy for user space to consume. That's why I proposed snprintf. As far as 6 arg issue: long bpf_snprintf(const char *out, u32 out_size, const char *fmt, u32 fmt_size, const void *data, u32 data_len); Yeah. It won't work as-is, but fmt_size is unnecessary nowadays. The verifier understands read-only data. Hence the helper can be: long bpf_snprintf(const char *out, u32 out_size, const char *fmt, const void *data, u32 data_len); The 3rd arg cannot be ARG_PTR_TO_MEM. Instead we can introduce ARG_PTR_TO_CONST_STR in the verifier. See check_mem_access() where it's doing bpf_map_direct_read(). That 'fmt' string will be accessed through the same bpf_map_direct_read(). The verifier would need to check that it's NUL-terminated valid string. It should probably do % specifier checks at the same time. At the end bpf_snprintf() will have 5 args and when wrapped with BPF_SNPRINTF() macro it will accept arbitrary number of arguments to print. It also will be generally useful to do all other kinds of pretty printing.
On 12/17/20 7:20 PM, Alexei Starovoitov wrote: > On Thu, Dec 17, 2020 at 09:26:09AM -0800, Yonghong Song wrote: >> >> >> On 12/17/20 7:31 AM, Florent Revest wrote: >>> On Mon, Dec 14, 2020 at 7:47 AM Yonghong Song <yhs@fb.com> wrote: >>>> On 12/11/20 6:40 AM, Florent Revest wrote: >>>>> On Wed, Dec 2, 2020 at 10:18 PM Alexei Starovoitov >>>>> <alexei.starovoitov@gmail.com> wrote: >>>>>> I still think that adopting printk/vsnprintf for this instead of >>>>>> reinventing the wheel >>>>>> is more flexible and easier to maintain long term. >>>>>> Almost the same layout can be done with vsnprintf >>>>>> with exception of \0 char. >>>>>> More meaningful names, etc. >>>>>> See Documentation/core-api/printk-formats.rst >>>>> >>>>> I agree this would be nice. I finally got a bit of time to experiment >>>>> with this and I noticed a few things: >>>>> >>>>> First of all, because helpers only have 5 arguments, if we use two for >>>>> the output buffer and its size and two for the format string and its >>>>> size, we are only left with one argument for a modifier. This is still >>>>> enough for our usecase (where we'd only use "%ps" for example) but it >>>>> does not strictly-speaking allow for the same layout that Andrii >>>>> proposed. >>>> >>>> See helper bpf_seq_printf. It packs all arguments for format string and >>>> puts them into an array. bpf_seq_printf will unpack them as it parsed >>>> through the format string. So it should be doable to have more than >>>> "%ps" in format string. >>> >>> This could be a nice trick, thank you for the suggestion Yonghong :) >>> >>> My understanding is that this would also require two extra args (one >>> for the array of arguments and one for the size of this array) so it >>> would still not fit the 5 arguments limit I described in my previous >>> email. >>> eg: this would not be possible: >>> long bpf_snprintf(const char *out, u32 out_size, >>> const char *fmt, u32 fmt_size, >>> const void *data, u32 data_len) >> >> Right. bpf allows only up to 5 parameters. >>> >>> Would you then suggest that we also put the format string and its >>> length in the first and second cells of this array and have something >>> along the line of: >>> long bpf_snprintf(const char *out, u32 out_size, >>> const void *args, u32 args_len) ? >>> This seems like a fairly opaque signature to me and harder to verify. >> >> One way is to define an explicit type for args, something like >> struct bpf_fmt_str_data { >> char *fmt; >> u64 fmt_len; >> u64 data[]; >> }; > > that feels a bit convoluted. > > The reason I feel unease with the helper as was originally proposed > and with Andrii's proposal is all the extra strlen and strcpy that > needs to be done. In the helper we have to call kallsyms_lookup() > which is ok interface for what it was desinged to do, > but it's awkward to use to construct new string ("%s [%s]", sym, modname) > or to send two strings into a ring buffer. > Andrii's zero separator idea will simplify bpf prog, but user space > would need to do strlen anyway if it needs to pretty print. > If we take pain on converting addr to sym+modname let's figure out > how to make it easy for the bpf prog to do and easy for user space to consume. > That's why I proposed snprintf. > > As far as 6 arg issue: > long bpf_snprintf(const char *out, u32 out_size, > const char *fmt, u32 fmt_size, > const void *data, u32 data_len); > Yeah. It won't work as-is, but fmt_size is unnecessary nowadays. > The verifier understands read-only data. > Hence the helper can be: > long bpf_snprintf(const char *out, u32 out_size, > const char *fmt, > const void *data, u32 data_len); > The 3rd arg cannot be ARG_PTR_TO_MEM. > Instead we can introduce ARG_PTR_TO_CONST_STR in the verifier. This should work except if fmt string is on the stack. Maybe this is an okay tradeoff. > See check_mem_access() where it's doing bpf_map_direct_read(). > That 'fmt' string will be accessed through the same bpf_map_direct_read(). > The verifier would need to check that it's NUL-terminated valid string. > It should probably do % specifier checks at the same time. > At the end bpf_snprintf() will have 5 args and when wrapped with > BPF_SNPRINTF() macro it will accept arbitrary number of arguments to print. > It also will be generally useful to do all other kinds of pretty printing. >
On Thu, Dec 17, 2020 at 7:20 PM Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote: > > On Thu, Dec 17, 2020 at 09:26:09AM -0800, Yonghong Song wrote: > > > > > > On 12/17/20 7:31 AM, Florent Revest wrote: > > > On Mon, Dec 14, 2020 at 7:47 AM Yonghong Song <yhs@fb.com> wrote: > > > > On 12/11/20 6:40 AM, Florent Revest wrote: > > > > > On Wed, Dec 2, 2020 at 10:18 PM Alexei Starovoitov > > > > > <alexei.starovoitov@gmail.com> wrote: > > > > > > I still think that adopting printk/vsnprintf for this instead of > > > > > > reinventing the wheel > > > > > > is more flexible and easier to maintain long term. > > > > > > Almost the same layout can be done with vsnprintf > > > > > > with exception of \0 char. > > > > > > More meaningful names, etc. > > > > > > See Documentation/core-api/printk-formats.rst > > > > > > > > > > I agree this would be nice. I finally got a bit of time to experiment > > > > > with this and I noticed a few things: > > > > > > > > > > First of all, because helpers only have 5 arguments, if we use two for > > > > > the output buffer and its size and two for the format string and its > > > > > size, we are only left with one argument for a modifier. This is still > > > > > enough for our usecase (where we'd only use "%ps" for example) but it > > > > > does not strictly-speaking allow for the same layout that Andrii > > > > > proposed. > > > > > > > > See helper bpf_seq_printf. It packs all arguments for format string and > > > > puts them into an array. bpf_seq_printf will unpack them as it parsed > > > > through the format string. So it should be doable to have more than > > > > "%ps" in format string. > > > > > > This could be a nice trick, thank you for the suggestion Yonghong :) > > > > > > My understanding is that this would also require two extra args (one > > > for the array of arguments and one for the size of this array) so it > > > would still not fit the 5 arguments limit I described in my previous > > > email. > > > eg: this would not be possible: > > > long bpf_snprintf(const char *out, u32 out_size, > > > const char *fmt, u32 fmt_size, > > > const void *data, u32 data_len) > > > > Right. bpf allows only up to 5 parameters. > > > > > > Would you then suggest that we also put the format string and its > > > length in the first and second cells of this array and have something > > > along the line of: > > > long bpf_snprintf(const char *out, u32 out_size, > > > const void *args, u32 args_len) ? > > > This seems like a fairly opaque signature to me and harder to verify. > > > > One way is to define an explicit type for args, something like > > struct bpf_fmt_str_data { > > char *fmt; > > u64 fmt_len; > > u64 data[]; > > }; > > that feels a bit convoluted. > > The reason I feel unease with the helper as was originally proposed > and with Andrii's proposal is all the extra strlen and strcpy that > needs to be done. In the helper we have to call kallsyms_lookup() > which is ok interface for what it was desinged to do, > but it's awkward to use to construct new string ("%s [%s]", sym, modname) > or to send two strings into a ring buffer. > Andrii's zero separator idea will simplify bpf prog, but user space > would need to do strlen anyway if it needs to pretty print. > If we take pain on converting addr to sym+modname let's figure out > how to make it easy for the bpf prog to do and easy for user space to consume. > That's why I proposed snprintf. I have nothing against snprintf support for symbols. But bpf_ksym_resolve() solves only a partially overlapping problem, so deserves to be added in addition to snprintf support. With snprintf, it will be hard to avoid two lookups of the same symbol to print "%s [%s]" form, so there is a performance loss, which is probably bigger than a simple search for a zero-byte. But bpf_ksym_resolve() can be used flexibly. You can either do two separate bpf_ksym_resolve() calls to get symbol name (and its length) and symbol's module (and its length), if you need to process it programmatically in BPF program. Or you can bundle it together and let user-space process it. User-space will need to copy data anyways because it can't stay in perfbuf/ringbuf for long. So scanning for zero delimiters will be negligible, it will just bring data into cache. All I'm saying is that ksym_resolve() gives flexibility which snprintf can't provide. Additionally, with ksym_resolve() being able to return base address, it's now possible to do a bunch of new stuff, from in-BPF symbolization to additional things like correlating memory accesses or function calls, etc. We just need to make sure that fixed-length base addr is put first, before symbol name and symbol module (if they are requested), so that a BPF program just knows that it's at offset 0. We can discuss those details separately (it's just a matter of ordering bits), my point is that ksym_resolve() is more powerful than snprintf(): the latter can be used pretty much only for pretty-printing. > > As far as 6 arg issue: > long bpf_snprintf(const char *out, u32 out_size, > const char *fmt, u32 fmt_size, > const void *data, u32 data_len); > Yeah. It won't work as-is, but fmt_size is unnecessary nowadays. > The verifier understands read-only data. > Hence the helper can be: > long bpf_snprintf(const char *out, u32 out_size, With the power of BTF, we can also put these two correlated values into a single struct and pass a pointer to it. It will take only one parameter for one memory region. Alternative is the "fat pointer" approach that Go and Rust use, but it's less flexible overall. > const char *fmt, > const void *data, u32 data_len); > The 3rd arg cannot be ARG_PTR_TO_MEM. > Instead we can introduce ARG_PTR_TO_CONST_STR in the verifier. > See check_mem_access() where it's doing bpf_map_direct_read(). > That 'fmt' string will be accessed through the same bpf_map_direct_read(). > The verifier would need to check that it's NUL-terminated valid string. > It should probably do % specifier checks at the same time. > At the end bpf_snprintf() will have 5 args and when wrapped with > BPF_SNPRINTF() macro it will accept arbitrary number of arguments to print. > It also will be generally useful to do all other kinds of pretty printing.
On Fri, Dec 18, 2020 at 10:53:57AM -0800, Andrii Nakryiko wrote: > On Thu, Dec 17, 2020 at 7:20 PM Alexei Starovoitov > <alexei.starovoitov@gmail.com> wrote: > > > > On Thu, Dec 17, 2020 at 09:26:09AM -0800, Yonghong Song wrote: > > > > > > > > > On 12/17/20 7:31 AM, Florent Revest wrote: > > > > On Mon, Dec 14, 2020 at 7:47 AM Yonghong Song <yhs@fb.com> wrote: > > > > > On 12/11/20 6:40 AM, Florent Revest wrote: > > > > > > On Wed, Dec 2, 2020 at 10:18 PM Alexei Starovoitov > > > > > > <alexei.starovoitov@gmail.com> wrote: > > > > > > > I still think that adopting printk/vsnprintf for this instead of > > > > > > > reinventing the wheel > > > > > > > is more flexible and easier to maintain long term. > > > > > > > Almost the same layout can be done with vsnprintf > > > > > > > with exception of \0 char. > > > > > > > More meaningful names, etc. > > > > > > > See Documentation/core-api/printk-formats.rst > > > > > > > > > > > > I agree this would be nice. I finally got a bit of time to experiment > > > > > > with this and I noticed a few things: > > > > > > > > > > > > First of all, because helpers only have 5 arguments, if we use two for > > > > > > the output buffer and its size and two for the format string and its > > > > > > size, we are only left with one argument for a modifier. This is still > > > > > > enough for our usecase (where we'd only use "%ps" for example) but it > > > > > > does not strictly-speaking allow for the same layout that Andrii > > > > > > proposed. > > > > > > > > > > See helper bpf_seq_printf. It packs all arguments for format string and > > > > > puts them into an array. bpf_seq_printf will unpack them as it parsed > > > > > through the format string. So it should be doable to have more than > > > > > "%ps" in format string. > > > > > > > > This could be a nice trick, thank you for the suggestion Yonghong :) > > > > > > > > My understanding is that this would also require two extra args (one > > > > for the array of arguments and one for the size of this array) so it > > > > would still not fit the 5 arguments limit I described in my previous > > > > email. > > > > eg: this would not be possible: > > > > long bpf_snprintf(const char *out, u32 out_size, > > > > const char *fmt, u32 fmt_size, > > > > const void *data, u32 data_len) > > > > > > Right. bpf allows only up to 5 parameters. > > > > > > > > Would you then suggest that we also put the format string and its > > > > length in the first and second cells of this array and have something > > > > along the line of: > > > > long bpf_snprintf(const char *out, u32 out_size, > > > > const void *args, u32 args_len) ? > > > > This seems like a fairly opaque signature to me and harder to verify. > > > > > > One way is to define an explicit type for args, something like > > > struct bpf_fmt_str_data { > > > char *fmt; > > > u64 fmt_len; > > > u64 data[]; > > > }; > > > > that feels a bit convoluted. > > > > The reason I feel unease with the helper as was originally proposed > > and with Andrii's proposal is all the extra strlen and strcpy that > > needs to be done. In the helper we have to call kallsyms_lookup() > > which is ok interface for what it was desinged to do, > > but it's awkward to use to construct new string ("%s [%s]", sym, modname) > > or to send two strings into a ring buffer. > > Andrii's zero separator idea will simplify bpf prog, but user space > > would need to do strlen anyway if it needs to pretty print. > > If we take pain on converting addr to sym+modname let's figure out > > how to make it easy for the bpf prog to do and easy for user space to consume. > > That's why I proposed snprintf. > > I have nothing against snprintf support for symbols. But > bpf_ksym_resolve() solves only a partially overlapping problem, so > deserves to be added in addition to snprintf support. With snprintf, > it will be hard to avoid two lookups of the same symbol to print "%s > [%s]" form, so there is a performance loss, which is probably bigger > than a simple search for a zero-byte. I suspect we're not on the same page in terms of what printf can do. See Documentation/core-api/printk-formats.rst and lib/vsprintf.c:symbol_string() It's exactly one lookup in sprintf implementation. bpf_snprintf(buf, "%ps", addr) would be equivalent to { ksym_resolve(sym, modname, addr, SYM | MOD); printf("%s [%s]", sym, modname); } > But bpf_ksym_resolve() can be > used flexibly. You can either do two separate bpf_ksym_resolve() calls > to get symbol name (and its length) and symbol's module (and its > length), if you need to process it programmatically in BPF program. Or > you can bundle it together and let user-space process it. User-space > will need to copy data anyways because it can't stay in > perfbuf/ringbuf for long. So scanning for zero delimiters will be > negligible, it will just bring data into cache. All I'm saying is that > ksym_resolve() gives flexibility which snprintf can't provide. Well, with snprintf there will be no way to print mod symbol without modname, but imo it's a good thing. What is the use case for getting mod symbol without modname? > Additionally, with ksym_resolve() being able to return base address, > it's now possible to do a bunch of new stuff, from in-BPF > symbolization to additional things like correlating memory accesses or > function calls, etc. Getting adjusted base address could be useful some day, but why now? What for? > bits), my point is that ksym_resolve() is more powerful than > snprintf(): the latter can be used pretty much only for > pretty-printing. Potentially yes. I think the stated goal was pretty printing. > > > > > As far as 6 arg issue: > > long bpf_snprintf(const char *out, u32 out_size, > > const char *fmt, u32 fmt_size, > > const void *data, u32 data_len); > > Yeah. It won't work as-is, but fmt_size is unnecessary nowadays. > > The verifier understands read-only data. > > Hence the helper can be: > > long bpf_snprintf(const char *out, u32 out_size, > > With the power of BTF, we can also put these two correlated values > into a single struct and pass a pointer to it. It will take only one > parameter for one memory region. Alternative is the "fat pointer" > approach that Go and Rust use, but it's less flexible overall. I think it will be less flexible when output size is fixed by the type info. With explicit size the bpf_snprintf() can print directly into ringbuffer. Multiple bpf_snprintf() will be able to fill it one by one reducing space available at every step. bpf_snprintf() would need to return the number of bytes, of course. Just like probe_read_str.
On Fri, Dec 18, 2020 at 12:36 PM Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote: > > On Fri, Dec 18, 2020 at 10:53:57AM -0800, Andrii Nakryiko wrote: > > On Thu, Dec 17, 2020 at 7:20 PM Alexei Starovoitov > > <alexei.starovoitov@gmail.com> wrote: > > > > > > On Thu, Dec 17, 2020 at 09:26:09AM -0800, Yonghong Song wrote: > > > > > > > > > > > > On 12/17/20 7:31 AM, Florent Revest wrote: > > > > > On Mon, Dec 14, 2020 at 7:47 AM Yonghong Song <yhs@fb.com> wrote: > > > > > > On 12/11/20 6:40 AM, Florent Revest wrote: > > > > > > > On Wed, Dec 2, 2020 at 10:18 PM Alexei Starovoitov > > > > > > > <alexei.starovoitov@gmail.com> wrote: > > > > > > > > I still think that adopting printk/vsnprintf for this instead of > > > > > > > > reinventing the wheel > > > > > > > > is more flexible and easier to maintain long term. > > > > > > > > Almost the same layout can be done with vsnprintf > > > > > > > > with exception of \0 char. > > > > > > > > More meaningful names, etc. > > > > > > > > See Documentation/core-api/printk-formats.rst > > > > > > > > > > > > > > I agree this would be nice. I finally got a bit of time to experiment > > > > > > > with this and I noticed a few things: > > > > > > > > > > > > > > First of all, because helpers only have 5 arguments, if we use two for > > > > > > > the output buffer and its size and two for the format string and its > > > > > > > size, we are only left with one argument for a modifier. This is still > > > > > > > enough for our usecase (where we'd only use "%ps" for example) but it > > > > > > > does not strictly-speaking allow for the same layout that Andrii > > > > > > > proposed. > > > > > > > > > > > > See helper bpf_seq_printf. It packs all arguments for format string and > > > > > > puts them into an array. bpf_seq_printf will unpack them as it parsed > > > > > > through the format string. So it should be doable to have more than > > > > > > "%ps" in format string. > > > > > > > > > > This could be a nice trick, thank you for the suggestion Yonghong :) > > > > > > > > > > My understanding is that this would also require two extra args (one > > > > > for the array of arguments and one for the size of this array) so it > > > > > would still not fit the 5 arguments limit I described in my previous > > > > > email. > > > > > eg: this would not be possible: > > > > > long bpf_snprintf(const char *out, u32 out_size, > > > > > const char *fmt, u32 fmt_size, > > > > > const void *data, u32 data_len) > > > > > > > > Right. bpf allows only up to 5 parameters. > > > > > > > > > > Would you then suggest that we also put the format string and its > > > > > length in the first and second cells of this array and have something > > > > > along the line of: > > > > > long bpf_snprintf(const char *out, u32 out_size, > > > > > const void *args, u32 args_len) ? > > > > > This seems like a fairly opaque signature to me and harder to verify. > > > > > > > > One way is to define an explicit type for args, something like > > > > struct bpf_fmt_str_data { > > > > char *fmt; > > > > u64 fmt_len; > > > > u64 data[]; > > > > }; > > > > > > that feels a bit convoluted. > > > > > > The reason I feel unease with the helper as was originally proposed > > > and with Andrii's proposal is all the extra strlen and strcpy that > > > needs to be done. In the helper we have to call kallsyms_lookup() > > > which is ok interface for what it was desinged to do, > > > but it's awkward to use to construct new string ("%s [%s]", sym, modname) > > > or to send two strings into a ring buffer. > > > Andrii's zero separator idea will simplify bpf prog, but user space > > > would need to do strlen anyway if it needs to pretty print. > > > If we take pain on converting addr to sym+modname let's figure out > > > how to make it easy for the bpf prog to do and easy for user space to consume. > > > That's why I proposed snprintf. > > > > I have nothing against snprintf support for symbols. But > > bpf_ksym_resolve() solves only a partially overlapping problem, so > > deserves to be added in addition to snprintf support. With snprintf, > > it will be hard to avoid two lookups of the same symbol to print "%s > > [%s]" form, so there is a performance loss, which is probably bigger > > than a simple search for a zero-byte. > > I suspect we're not on the same page in terms of what printf can do. > See Documentation/core-api/printk-formats.rst and lib/vsprintf.c:symbol_string() > It's exactly one lookup in sprintf implementation. > bpf_snprintf(buf, "%ps", addr) would be equivalent to > { > ksym_resolve(sym, modname, addr, SYM | MOD); > printf("%s [%s]", sym, modname); > } Ah, I missed that we'll have a single specifier for "%s [%s]" format. My assumption was that we have one for symbol name only and another for symbol module. Yeah, then it's fine from the performance perspective. > > > But bpf_ksym_resolve() can be > > used flexibly. You can either do two separate bpf_ksym_resolve() calls > > to get symbol name (and its length) and symbol's module (and its > > length), if you need to process it programmatically in BPF program. Or > > you can bundle it together and let user-space process it. User-space > > will need to copy data anyways because it can't stay in > > perfbuf/ringbuf for long. So scanning for zero delimiters will be > > negligible, it will just bring data into cache. All I'm saying is that > > ksym_resolve() gives flexibility which snprintf can't provide. > > Well, with snprintf there will be no way to print mod symbol > without modname, but imo it's a good thing. > What is the use case for getting mod symbol without modname? For easier post-processing on the user side. Instead of parsing "vmlinux_symbol" or "module_symbol [module_name]" (two non-uniform variants already), user-space would just get two separate strings. I just like APIs that don't assume how I am going to use them :), so "symbol [module]" format is a bit more inconvenient than decomposed pieces. > > > Additionally, with ksym_resolve() being able to return base address, > > it's now possible to do a bunch of new stuff, from in-BPF > > symbolization to additional things like correlating memory accesses or > > function calls, etc. > > Getting adjusted base address could be useful some day, but why now? What for? I proposed that only if we do bpf_ksym_resolve(). No need to support that in snprintf case, of course. > > > bits), my point is that ksym_resolve() is more powerful than > > snprintf(): the latter can be used pretty much only for > > pretty-printing. > > Potentially yes. I think the stated goal was pretty printing. That's fine if we do only snprintf, yes. But if a separate helper, then we should think more broadly. > > > > > > > > > As far as 6 arg issue: > > > long bpf_snprintf(const char *out, u32 out_size, > > > const char *fmt, u32 fmt_size, > > > const void *data, u32 data_len); > > > Yeah. It won't work as-is, but fmt_size is unnecessary nowadays. > > > The verifier understands read-only data. > > > Hence the helper can be: > > > long bpf_snprintf(const char *out, u32 out_size, > > > > With the power of BTF, we can also put these two correlated values > > into a single struct and pass a pointer to it. It will take only one > > parameter for one memory region. Alternative is the "fat pointer" > > approach that Go and Rust use, but it's less flexible overall. > > I think it will be less flexible when output size is fixed by the type info. > With explicit size the bpf_snprintf() can print directly into ringbuffer. > Multiple bpf_snprintf() will be able to fill it one by one reducing > space available at every step. > bpf_snprintf() would need to return the number of bytes, of course. > Just like probe_read_str. Ok, I should have probably demonstrated with an example. I don't propose to specify the size through BTF itself. I was thinking about: struct bpf_mem_ptr { void *data; size_t size; }; struct bpf_mem_ptr p = { ptr, 123 }; bpf_whatever_helper(&p, ...); bpf_whatever_helper() will specify that the first argument has to be PTR_TO_BTF_ID where btf_id corresponds to struct bpf_mem_ptr. Hope this helps.
FYI, there is a reason why kallsyms_lookup is not exported any more. I don't think adding that back through a backdoor is a good idea.
On Tue, Dec 22, 2020 at 3:18 PM Christoph Hellwig <hch@infradead.org> wrote: > > FYI, there is a reason why kallsyms_lookup is not exported any more. > I don't think adding that back through a backdoor is a good idea. Did you maybe mean kallsyms_lookup_name (the one that looks an address up based on a symbol name) ? It used to be exported but isn't anymore indeed. However, this is not what we're trying to do. As far as I can tell, kallsyms_lookup (the one that looks a symbol name up based on an address) has never been exported but its close cousins sprint_symbol and sprint_symbol_no_offset (which only call kallsyms_lookup and pretty print the result) are still exported, they are also used by vsprintf. Is this an issue ?
On Fri, Dec 18, 2020 at 9:47 PM Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote: > > On Fri, Dec 18, 2020 at 12:36 PM Alexei Starovoitov > <alexei.starovoitov@gmail.com> wrote: > > > > On Fri, Dec 18, 2020 at 10:53:57AM -0800, Andrii Nakryiko wrote: > > > On Thu, Dec 17, 2020 at 7:20 PM Alexei Starovoitov > > > <alexei.starovoitov@gmail.com> wrote: > > > > > > > > On Thu, Dec 17, 2020 at 09:26:09AM -0800, Yonghong Song wrote: > > > > > > > > > > > > > > > On 12/17/20 7:31 AM, Florent Revest wrote: > > > > > > On Mon, Dec 14, 2020 at 7:47 AM Yonghong Song <yhs@fb.com> wrote: > > > > > > > On 12/11/20 6:40 AM, Florent Revest wrote: > > > > > > > > On Wed, Dec 2, 2020 at 10:18 PM Alexei Starovoitov > > > > > > > > <alexei.starovoitov@gmail.com> wrote: > > > > > > > > > I still think that adopting printk/vsnprintf for this instead of > > > > > > > > > reinventing the wheel > > > > > > > > > is more flexible and easier to maintain long term. > > > > > > > > > Almost the same layout can be done with vsnprintf > > > > > > > > > with exception of \0 char. > > > > > > > > > More meaningful names, etc. > > > > > > > > > See Documentation/core-api/printk-formats.rst > > > > > > > > > > > > > > > > I agree this would be nice. I finally got a bit of time to experiment > > > > > > > > with this and I noticed a few things: > > > > > > > > > > > > > > > > First of all, because helpers only have 5 arguments, if we use two for > > > > > > > > the output buffer and its size and two for the format string and its > > > > > > > > size, we are only left with one argument for a modifier. This is still > > > > > > > > enough for our usecase (where we'd only use "%ps" for example) but it > > > > > > > > does not strictly-speaking allow for the same layout that Andrii > > > > > > > > proposed. > > > > > > > > > > > > > > See helper bpf_seq_printf. It packs all arguments for format string and > > > > > > > puts them into an array. bpf_seq_printf will unpack them as it parsed > > > > > > > through the format string. So it should be doable to have more than > > > > > > > "%ps" in format string. > > > > > > > > > > > > This could be a nice trick, thank you for the suggestion Yonghong :) > > > > > > > > > > > > My understanding is that this would also require two extra args (one > > > > > > for the array of arguments and one for the size of this array) so it > > > > > > would still not fit the 5 arguments limit I described in my previous > > > > > > email. > > > > > > eg: this would not be possible: > > > > > > long bpf_snprintf(const char *out, u32 out_size, > > > > > > const char *fmt, u32 fmt_size, > > > > > > const void *data, u32 data_len) > > > > > > > > > > Right. bpf allows only up to 5 parameters. > > > > > > > > > > > > Would you then suggest that we also put the format string and its > > > > > > length in the first and second cells of this array and have something > > > > > > along the line of: > > > > > > long bpf_snprintf(const char *out, u32 out_size, > > > > > > const void *args, u32 args_len) ? > > > > > > This seems like a fairly opaque signature to me and harder to verify. > > > > > > > > > > One way is to define an explicit type for args, something like > > > > > struct bpf_fmt_str_data { > > > > > char *fmt; > > > > > u64 fmt_len; > > > > > u64 data[]; > > > > > }; > > > > > > > > that feels a bit convoluted. > > > > > > > > The reason I feel unease with the helper as was originally proposed > > > > and with Andrii's proposal is all the extra strlen and strcpy that > > > > needs to be done. In the helper we have to call kallsyms_lookup() Note that vsprintf itself calls __sprint_symbol which does the same thing as my helper (a kallsyms_lookup followed by a strcpy and a strlen) > > > > which is ok interface for what it was desinged to do, > > > > but it's awkward to use to construct new string ("%s [%s]", sym, modname) > > > > or to send two strings into a ring buffer. > > > > Andrii's zero separator idea will simplify bpf prog, but user space > > > > would need to do strlen anyway if it needs to pretty print. > > > > If we take pain on converting addr to sym+modname let's figure out > > > > how to make it easy for the bpf prog to do and easy for user space to consume. > > > > That's why I proposed snprintf. Both solutions are fine with us but I feel that the snprintf would be generally more helpful for BPF. > > > > > > I have nothing against snprintf support for symbols. But > > > bpf_ksym_resolve() solves only a partially overlapping problem, so > > > deserves to be added in addition to snprintf support. With snprintf, > > > it will be hard to avoid two lookups of the same symbol to print "%s > > > [%s]" form, so there is a performance loss, which is probably bigger > > > than a simple search for a zero-byte. > > > > I suspect we're not on the same page in terms of what printf can do. > > See Documentation/core-api/printk-formats.rst and lib/vsprintf.c:symbol_string() > > It's exactly one lookup in sprintf implementation. > > bpf_snprintf(buf, "%ps", addr) would be equivalent to > > { > > ksym_resolve(sym, modname, addr, SYM | MOD); > > printf("%s [%s]", sym, modname); > > } > > Ah, I missed that we'll have a single specifier for "%s [%s]" format. > My assumption was that we have one for symbol name only and another > for symbol module. Yeah, then it's fine from the performance > perspective. > > > > > > But bpf_ksym_resolve() can be > > > used flexibly. You can either do two separate bpf_ksym_resolve() calls > > > to get symbol name (and its length) and symbol's module (and its > > > length), if you need to process it programmatically in BPF program. Or > > > you can bundle it together and let user-space process it. User-space > > > will need to copy data anyways because it can't stay in > > > perfbuf/ringbuf for long. So scanning for zero delimiters will be > > > negligible, it will just bring data into cache. All I'm saying is that > > > ksym_resolve() gives flexibility which snprintf can't provide. > > > > Well, with snprintf there will be no way to print mod symbol > > without modname, but imo it's a good thing. > > What is the use case for getting mod symbol without modname? > > For easier post-processing on the user side. Instead of parsing > "vmlinux_symbol" or "module_symbol [module_name]" (two non-uniform > variants already), user-space would just get two separate strings. I > just like APIs that don't assume how I am going to use them :), so > "symbol [module]" format is a bit more inconvenient than decomposed > pieces. > > > > > Additionally, with ksym_resolve() being able to return base address, > > > it's now possible to do a bunch of new stuff, from in-BPF > > > symbolization to additional things like correlating memory accesses or > > > function calls, etc. > > > > Getting adjusted base address could be useful some day, but why now? What for? > > I proposed that only if we do bpf_ksym_resolve(). No need to support > that in snprintf case, of course. > > > > > > bits), my point is that ksym_resolve() is more powerful than > > > snprintf(): the latter can be used pretty much only for > > > pretty-printing. > > > > Potentially yes. I think the stated goal was pretty printing. > > That's fine if we do only snprintf, yes. But if a separate helper, > then we should think more broadly. Let's start with only snprintf then, this solves our usecase and if a different need arises in the future (eg: offset) we could design a new helper around that need. > > > > > > > > > > > > > As far as 6 arg issue: > > > > long bpf_snprintf(const char *out, u32 out_size, > > > > const char *fmt, u32 fmt_size, > > > > const void *data, u32 data_len); > > > > Yeah. It won't work as-is, but fmt_size is unnecessary nowadays. > > > > The verifier understands read-only data. > > > > Hence the helper can be: > > > > long bpf_snprintf(const char *out, u32 out_size, > > > > > > With the power of BTF, we can also put these two correlated values > > > into a single struct and pass a pointer to it. It will take only one > > > parameter for one memory region. Alternative is the "fat pointer" > > > approach that Go and Rust use, but it's less flexible overall. > > > > I think it will be less flexible when output size is fixed by the type info. > > With explicit size the bpf_snprintf() can print directly into ringbuffer. > > Multiple bpf_snprintf() will be able to fill it one by one reducing > > space available at every step. > > bpf_snprintf() would need to return the number of bytes, of course. > > Just like probe_read_str. > > Ok, I should have probably demonstrated with an example. I don't > propose to specify the size through BTF itself. I was thinking about: > > struct bpf_mem_ptr { > void *data; > size_t size; > }; > > > struct bpf_mem_ptr p = { ptr, 123 }; > bpf_whatever_helper(&p, ...); > > > bpf_whatever_helper() will specify that the first argument has to be > PTR_TO_BTF_ID where btf_id corresponds to struct bpf_mem_ptr. Hope > this helps.
On Fri, Dec 18, 2020 at 4:20 AM Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote: > As far as 6 arg issue: > long bpf_snprintf(const char *out, u32 out_size, > const char *fmt, u32 fmt_size, > const void *data, u32 data_len); > Yeah. It won't work as-is, but fmt_size is unnecessary nowadays. > The verifier understands read-only data. > Hence the helper can be: > long bpf_snprintf(const char *out, u32 out_size, > const char *fmt, > const void *data, u32 data_len); > The 3rd arg cannot be ARG_PTR_TO_MEM. > Instead we can introduce ARG_PTR_TO_CONST_STR in the verifier. > See check_mem_access() where it's doing bpf_map_direct_read(). > That 'fmt' string will be accessed through the same bpf_map_direct_read(). > The verifier would need to check that it's NUL-terminated valid string. Ok, this works for me. > It should probably do % specifier checks at the same time. However, I'm still not sure whether that would work. Did you maybe miss my comment in a previous email? Let me put it back here: > The iteration that bpf_trace_printk does over the format string > argument is not only used for validation. It is also used to remember > what extra operations need to be done based on the modifier types. For > example, it remembers whether an arg should be interpreted as 32bits or > 64bits. In the case of string printing, it also remembers whether it is > a kernel-space or user-space pointer so that bpf_trace_copy_string can > be called with the right arg. If we were to run the iteration over the format > string in the verifier, how would you recommend that we > "remember" the modifier type until the helper gets called ? The best solution I can think of would be to iterate over the format string in the helper. In that case, the format string verification in the verifier would be redundant and the format string wouldn't have to be constant. Do you have any suggestions ? > At the end bpf_snprintf() will have 5 args and when wrapped with > BPF_SNPRINTF() macro it will accept arbitrary number of arguments to print. > It also will be generally useful to do all other kinds of pretty printing. Yep this macro is a good idea, I like that. :)
On Tue, Dec 22, 2020 at 09:17:41PM +0100, Florent Revest wrote: > On Tue, Dec 22, 2020 at 3:18 PM Christoph Hellwig <hch@infradead.org> wrote: > > > > FYI, there is a reason why kallsyms_lookup is not exported any more. > > I don't think adding that back through a backdoor is a good idea. > > Did you maybe mean kallsyms_lookup_name (the one that looks an address > up based on a symbol name) ? It used to be exported but isn't anymore > indeed. > However, this is not what we're trying to do. As far as I can tell, > kallsyms_lookup (the one that looks a symbol name up based on an > address) has never been exported but its close cousins sprint_symbol > and sprint_symbol_no_offset (which only call kallsyms_lookup and > pretty print the result) are still exported, they are also used by > vsprintf. Is this an issue ? Indeed, I thought of kallsyms_lookup_name. Let me take another look at the patch, but kallsyms_lookup still seems like a very lowlevel function to export to arbitrary eBPF programs.
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c3458ec1f30a..670998635eac 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3817,6 +3817,21 @@ union bpf_attr { * The **hash_algo** is returned on success, * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if * invalid arguments are passed. + * + * long bpf_kallsyms_lookup(u64 address, char *symbol, u32 symbol_size, char *module, u32 module_size) + * Description + * Uses kallsyms to write the name of the symbol at *address* + * into *symbol* of size *symbol_sz*. This is guaranteed to be + * zero terminated. + * If the symbol is in a module, up to *module_size* bytes of + * the module name is written in *module*. This is also + * guaranteed to be zero-terminated. Note: a module name + * is always shorter than 64 bytes. + * Return + * On success, the strictly positive length of the full symbol + * name, If this is greater than *symbol_size*, the written + * symbol is truncated. + * On error, a negative value. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3981,6 +3996,7 @@ union bpf_attr { FN(bprm_opts_set), \ FN(ktime_get_coarse_ns), \ FN(ima_inode_hash), \ + FN(kallsyms_lookup), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index d255bc9b2bfa..9d86e20c2b13 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -17,6 +17,7 @@ #include <linux/error-injection.h> #include <linux/btf_ids.h> #include <linux/bpf_lsm.h> +#include <linux/kallsyms.h> #include <net/bpf_sk_storage.h> @@ -1260,6 +1261,44 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = { .arg5_type = ARG_ANYTHING, }; +BPF_CALL_5(bpf_kallsyms_lookup, u64, address, char *, symbol, u32, symbol_size, + char *, module, u32, module_size) +{ + char buffer[KSYM_SYMBOL_LEN]; + unsigned long offset, size; + const char *name; + char *modname; + long ret; + + name = kallsyms_lookup(address, &size, &offset, &modname, buffer); + if (!name) + return -EINVAL; + + ret = strlen(name) + 1; + if (symbol_size) { + strncpy(symbol, name, symbol_size); + symbol[symbol_size - 1] = '\0'; + } + + if (modname && module_size) { + strncpy(module, modname, module_size); + module[module_size - 1] = '\0'; + } + + return ret; +} + +const struct bpf_func_proto bpf_kallsyms_lookup_proto = { + .func = bpf_kallsyms_lookup, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + const struct bpf_func_proto * bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -1356,6 +1395,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_per_cpu_ptr_proto; case BPF_FUNC_bpf_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; + case BPF_FUNC_kallsyms_lookup: + return &bpf_kallsyms_lookup_proto; default: return NULL; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c3458ec1f30a..670998635eac 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3817,6 +3817,21 @@ union bpf_attr { * The **hash_algo** is returned on success, * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if * invalid arguments are passed. + * + * long bpf_kallsyms_lookup(u64 address, char *symbol, u32 symbol_size, char *module, u32 module_size) + * Description + * Uses kallsyms to write the name of the symbol at *address* + * into *symbol* of size *symbol_sz*. This is guaranteed to be + * zero terminated. + * If the symbol is in a module, up to *module_size* bytes of + * the module name is written in *module*. This is also + * guaranteed to be zero-terminated. Note: a module name + * is always shorter than 64 bytes. + * Return + * On success, the strictly positive length of the full symbol + * name, If this is greater than *symbol_size*, the written + * symbol is truncated. + * On error, a negative value. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3981,6 +3996,7 @@ union bpf_attr { FN(bprm_opts_set), \ FN(ktime_get_coarse_ns), \ FN(ima_inode_hash), \ + FN(kallsyms_lookup), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper
This helper exposes the kallsyms_lookup function to eBPF tracing programs. This can be used to retrieve the name of the symbol at an address. For example, when hooking into nf_register_net_hook, one can audit the name of the registered netfilter hook and potentially also the name of the module in which the symbol is located. Signed-off-by: Florent Revest <revest@google.com> --- include/uapi/linux/bpf.h | 16 +++++++++++++ kernel/trace/bpf_trace.c | 41 ++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 16 +++++++++++++ 3 files changed, 73 insertions(+)