diff mbox series

libbpf: kprobe.multi: Filter with blacklist and available_filter_functions

Message ID 20230523132547.94384-1-liu.yun@linux.dev (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series libbpf: kprobe.multi: Filter with blacklist and available_filter_functions | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-6 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for veristat
bpf/vmtest-bpf-next-VM_Test-7 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on s390x with gcc

Commit Message

Jackie Liu May 23, 2023, 1:25 p.m. UTC
From: Jackie Liu <liuyun01@kylinos.cn>

When using regular expression matching with "kprobe multi", it scans all
the functions under "/proc/kallsyms" that can be matched. However, not all
of them can be traced by kprobe.multi. If any one of the functions fails
to be traced, it will result in the failure of all functions. The best
approach is to filter out the functions that cannot be traced to ensure
proper tracking of the functions.

But, the addition of these checks will frequently probe whether a function
complies with "available_filter_functions" and ensure that it has not been
filtered by kprobe's blacklist. As a result, it may take a longer time
during startup. The function implementation is referenced from BCC's
"kprobe_exists()"

Here is the test eBPF program [1].
[1] https://github.com/JackieLiu1/ketones/commit/a9e76d1ba57390e533b8b3eadde97f7a4535e867

Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
---
 tools/lib/bpf/libbpf.c | 47 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

Comments

Jiri Olsa May 23, 2023, 4:17 p.m. UTC | #1
On Tue, May 23, 2023 at 09:25:47PM +0800, Jackie Liu wrote:
> From: Jackie Liu <liuyun01@kylinos.cn>
> 
> When using regular expression matching with "kprobe multi", it scans all
> the functions under "/proc/kallsyms" that can be matched. However, not all
> of them can be traced by kprobe.multi. If any one of the functions fails
> to be traced, it will result in the failure of all functions. The best
> approach is to filter out the functions that cannot be traced to ensure
> proper tracking of the functions.
> 
> But, the addition of these checks will frequently probe whether a function
> complies with "available_filter_functions" and ensure that it has not been
> filtered by kprobe's blacklist. As a result, it may take a longer time
> during startup. The function implementation is referenced from BCC's
> "kprobe_exists()"
> 
> Here is the test eBPF program [1].
> [1] https://github.com/JackieLiu1/ketones/commit/a9e76d1ba57390e533b8b3eadde97f7a4535e867
> 
> Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
> ---
>  tools/lib/bpf/libbpf.c | 47 ++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 47 insertions(+)
> 
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index ad1ec893b41b..6a201267fa08 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -10421,6 +10421,50 @@ struct kprobe_multi_resolve {
>  	size_t cnt;
>  };
>  
> +static bool filter_available_function(const char *name)
> +{
> +	char addr_range[256];
> +	char sym_name[256];
> +	FILE *f;
> +	int ret;
> +
> +	f = fopen("/sys/kernel/debug/kprobes/blacklist", "r");
> +	if (!f)
> +		goto avail_filter;
> +
> +	while (true) {
> +		ret = fscanf(f, "%s %s%*[^\n]\n", addr_range, sym_name);
> +		if (ret == EOF && feof(f))
> +			break;
> +		if (ret != 2)
> +			break;
> +		if (!strcmp(name, sym_name)) {
> +			fclose(f);
> +			return false;
> +		}
> +	}
> +	fclose(f);

so available_filter_functions already contains all traceable symbols
for kprobe_multi/fprobe

kprobes/blacklist is kprobe specific and does not apply to fprobe,
is there a crash when attaching function from kprobes/blacklist ?

> +
> +avail_filter:
> +	f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
> +	if (!f)
> +		return true;
> +
> +	while (true) {
> +		ret = fscanf(f, "%s%*[^\n]\n", sym_name);
> +		if (ret == EOF && feof(f))
> +			break;
> +		if (ret != 1)
> +			break;
> +		if (!strcmp(name, sym_name)) {
> +			fclose(f);
> +			return true;
> +		}
> +	}
> +	fclose(f);
> +	return false;
> +}
> +
>  static int
>  resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
>  			const char *sym_name, void *ctx)
> @@ -10431,6 +10475,9 @@ resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
>  	if (!glob_match(sym_name, res->pattern))
>  		return 0;
>  
> +	if (!filter_available_function(sym_name))
> +		return 0;

I think it'd be better to parse available_filter_functions directly
for kprobe_multi instead of filtering out kallsyms entries

we could add libbpf_available_filter_functions_parse function with
similar callback to go over available_filter_functions file


jirka

> +
>  	err = libbpf_ensure_mem((void **) &res->addrs, &res->cap, sizeof(unsigned long),
>  				res->cnt + 1);
>  	if (err)
> -- 
> 2.25.1
> 
>
Andrii Nakryiko May 23, 2023, 6:22 p.m. UTC | #2
On Tue, May 23, 2023 at 9:17 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Tue, May 23, 2023 at 09:25:47PM +0800, Jackie Liu wrote:
> > From: Jackie Liu <liuyun01@kylinos.cn>
> >
> > When using regular expression matching with "kprobe multi", it scans all
> > the functions under "/proc/kallsyms" that can be matched. However, not all
> > of them can be traced by kprobe.multi. If any one of the functions fails
> > to be traced, it will result in the failure of all functions. The best
> > approach is to filter out the functions that cannot be traced to ensure
> > proper tracking of the functions.
> >
> > But, the addition of these checks will frequently probe whether a function
> > complies with "available_filter_functions" and ensure that it has not been
> > filtered by kprobe's blacklist. As a result, it may take a longer time
> > during startup. The function implementation is referenced from BCC's
> > "kprobe_exists()"
> >
> > Here is the test eBPF program [1].
> > [1] https://github.com/JackieLiu1/ketones/commit/a9e76d1ba57390e533b8b3eadde97f7a4535e867
> >
> > Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
> > ---
> >  tools/lib/bpf/libbpf.c | 47 ++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 47 insertions(+)
> >
> > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> > index ad1ec893b41b..6a201267fa08 100644
> > --- a/tools/lib/bpf/libbpf.c
> > +++ b/tools/lib/bpf/libbpf.c
> > @@ -10421,6 +10421,50 @@ struct kprobe_multi_resolve {
> >       size_t cnt;
> >  };
> >
> > +static bool filter_available_function(const char *name)
> > +{
> > +     char addr_range[256];
> > +     char sym_name[256];
> > +     FILE *f;
> > +     int ret;
> > +
> > +     f = fopen("/sys/kernel/debug/kprobes/blacklist", "r");
> > +     if (!f)
> > +             goto avail_filter;
> > +
> > +     while (true) {
> > +             ret = fscanf(f, "%s %s%*[^\n]\n", addr_range, sym_name);
> > +             if (ret == EOF && feof(f))
> > +                     break;
> > +             if (ret != 2)
> > +                     break;
> > +             if (!strcmp(name, sym_name)) {
> > +                     fclose(f);
> > +                     return false;
> > +             }
> > +     }
> > +     fclose(f);
>
> so available_filter_functions already contains all traceable symbols
> for kprobe_multi/fprobe
>
> kprobes/blacklist is kprobe specific and does not apply to fprobe,
> is there a crash when attaching function from kprobes/blacklist ?
>
> > +
> > +avail_filter:
> > +     f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
> > +     if (!f)
> > +             return true;
> > +
> > +     while (true) {
> > +             ret = fscanf(f, "%s%*[^\n]\n", sym_name);
> > +             if (ret == EOF && feof(f))
> > +                     break;
> > +             if (ret != 1)
> > +                     break;
> > +             if (!strcmp(name, sym_name)) {
> > +                     fclose(f);
> > +                     return true;
> > +             }
> > +     }
> > +     fclose(f);
> > +     return false;
> > +}
> > +
> >  static int
> >  resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
> >                       const char *sym_name, void *ctx)
> > @@ -10431,6 +10475,9 @@ resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
> >       if (!glob_match(sym_name, res->pattern))
> >               return 0;
> >
> > +     if (!filter_available_function(sym_name))
> > +             return 0;
>
> I think it'd be better to parse available_filter_functions directly
> for kprobe_multi instead of filtering out kallsyms entries

yep, available_filter_functions should be cheaper to parse than
kallsyms. We can probably fallback to kallsyms still, if
available_filter_functions are missing.

Furthermore, me and Steven chatted at lsfmm2023 about having an
available_filter_functions-like file with kernel function addresses
(not just names), which would speed up attachment as well. It could be
useful in some other scenarios as well (e.g., I think retsnoop has to
join kallsyms and available_filter_functions). I think it's still a
good idea to add this new file, given kernel has all this information
readily available anyways.


>
> we could add libbpf_available_filter_functions_parse function with
> similar callback to go over available_filter_functions file

or iterator ;)

but either way, current approach will do linear scan for each matched
function, which is hugely inefficient, so definitely a no go

>
>
> jirka
>
> > +
> >       err = libbpf_ensure_mem((void **) &res->addrs, &res->cap, sizeof(unsigned long),
> >                               res->cnt + 1);
> >       if (err)
> > --
> > 2.25.1
> >
> >
Jackie Liu May 24, 2023, 1:03 a.m. UTC | #3
Hi Jiri.

在 2023/5/24 00:17, Jiri Olsa 写道:
> On Tue, May 23, 2023 at 09:25:47PM +0800, Jackie Liu wrote:
>> From: Jackie Liu <liuyun01@kylinos.cn>
>>
>> When using regular expression matching with "kprobe multi", it scans all
>> the functions under "/proc/kallsyms" that can be matched. However, not all
>> of them can be traced by kprobe.multi. If any one of the functions fails
>> to be traced, it will result in the failure of all functions. The best
>> approach is to filter out the functions that cannot be traced to ensure
>> proper tracking of the functions.
>>
>> But, the addition of these checks will frequently probe whether a function
>> complies with "available_filter_functions" and ensure that it has not been
>> filtered by kprobe's blacklist. As a result, it may take a longer time
>> during startup. The function implementation is referenced from BCC's
>> "kprobe_exists()"
>>
>> Here is the test eBPF program [1].
>> [1] https://github.com/JackieLiu1/ketones/commit/a9e76d1ba57390e533b8b3eadde97f7a4535e867
>>
>> Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
>> ---
>>   tools/lib/bpf/libbpf.c | 47 ++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 47 insertions(+)
>>
>> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
>> index ad1ec893b41b..6a201267fa08 100644
>> --- a/tools/lib/bpf/libbpf.c
>> +++ b/tools/lib/bpf/libbpf.c
>> @@ -10421,6 +10421,50 @@ struct kprobe_multi_resolve {
>>   	size_t cnt;
>>   };
>>   
>> +static bool filter_available_function(const char *name)
>> +{
>> +	char addr_range[256];
>> +	char sym_name[256];
>> +	FILE *f;
>> +	int ret;
>> +
>> +	f = fopen("/sys/kernel/debug/kprobes/blacklist", "r");
>> +	if (!f)
>> +		goto avail_filter;
>> +
>> +	while (true) {
>> +		ret = fscanf(f, "%s %s%*[^\n]\n", addr_range, sym_name);
>> +		if (ret == EOF && feof(f))
>> +			break;
>> +		if (ret != 2)
>> +			break;
>> +		if (!strcmp(name, sym_name)) {
>> +			fclose(f);
>> +			return false;
>> +		}
>> +	}
>> +	fclose(f);
> 
> so available_filter_functions already contains all traceable symbols
> for kprobe_multi/fprobe
> 
> kprobes/blacklist is kprobe specific and does not apply to fprobe,
> is there a crash when attaching function from kprobes/blacklist ?

No, I haven't got crash before, Simply because BCC's kprobe_exists has
implemented it so I added this, Yes, I also don't think 
kprobes/blacklist will affect FPROBE, so I will remove it.

> 
>> +
>> +avail_filter:
>> +	f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
>> +	if (!f)
>> +		return true;
>> +
>> +	while (true) {
>> +		ret = fscanf(f, "%s%*[^\n]\n", sym_name);
>> +		if (ret == EOF && feof(f))
>> +			break;
>> +		if (ret != 1)
>> +			break;
>> +		if (!strcmp(name, sym_name)) {
>> +			fclose(f);
>> +			return true;
>> +		}
>> +	}
>> +	fclose(f);
>> +	return false;
>> +}
>> +
>>   static int
>>   resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
>>   			const char *sym_name, void *ctx)
>> @@ -10431,6 +10475,9 @@ resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
>>   	if (!glob_match(sym_name, res->pattern))
>>   		return 0;
>>   
>> +	if (!filter_available_function(sym_name))
>> +		return 0;
> 
> I think it'd be better to parse available_filter_functions directly
> for kprobe_multi instead of filtering out kallsyms entries
> 
> we could add libbpf_available_filter_functions_parse function with
> similar callback to go over available_filter_functions file
> 

Sure, if available_filter_functions not found, fallback to /proc/kallsyms.
Jackie Liu May 24, 2023, 1:19 a.m. UTC | #4
Hi Jiri.

在 2023/5/24 09:03, Jackie Liu 写道:
> Hi Jiri.
> 
> 在 2023/5/24 00:17, Jiri Olsa 写道:
>> On Tue, May 23, 2023 at 09:25:47PM +0800, Jackie Liu wrote:
>>> From: Jackie Liu <liuyun01@kylinos.cn>
>>>
>>> When using regular expression matching with "kprobe multi", it scans all
>>> the functions under "/proc/kallsyms" that can be matched. However, 
>>> not all
>>> of them can be traced by kprobe.multi. If any one of the functions fails
>>> to be traced, it will result in the failure of all functions. The best
>>> approach is to filter out the functions that cannot be traced to ensure
>>> proper tracking of the functions.
>>>
>>> But, the addition of these checks will frequently probe whether a 
>>> function
>>> complies with "available_filter_functions" and ensure that it has not 
>>> been
>>> filtered by kprobe's blacklist. As a result, it may take a longer time
>>> during startup. The function implementation is referenced from BCC's
>>> "kprobe_exists()"
>>>
>>> Here is the test eBPF program [1].
>>> [1] 
>>> https://github.com/JackieLiu1/ketones/commit/a9e76d1ba57390e533b8b3eadde97f7a4535e867
>>>
>>> Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
>>> ---
>>>   tools/lib/bpf/libbpf.c | 47 ++++++++++++++++++++++++++++++++++++++++++
>>>   1 file changed, 47 insertions(+)
>>>
>>> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
>>> index ad1ec893b41b..6a201267fa08 100644
>>> --- a/tools/lib/bpf/libbpf.c
>>> +++ b/tools/lib/bpf/libbpf.c
>>> @@ -10421,6 +10421,50 @@ struct kprobe_multi_resolve {
>>>       size_t cnt;
>>>   };
>>> +static bool filter_available_function(const char *name)
>>> +{
>>> +    char addr_range[256];
>>> +    char sym_name[256];
>>> +    FILE *f;
>>> +    int ret;
>>> +
>>> +    f = fopen("/sys/kernel/debug/kprobes/blacklist", "r");
>>> +    if (!f)
>>> +        goto avail_filter;
>>> +
>>> +    while (true) {
>>> +        ret = fscanf(f, "%s %s%*[^\n]\n", addr_range, sym_name);
>>> +        if (ret == EOF && feof(f))
>>> +            break;
>>> +        if (ret != 2)
>>> +            break;
>>> +        if (!strcmp(name, sym_name)) {
>>> +            fclose(f);
>>> +            return false;
>>> +        }
>>> +    }
>>> +    fclose(f);
>>
>> so available_filter_functions already contains all traceable symbols
>> for kprobe_multi/fprobe
>>
>> kprobes/blacklist is kprobe specific and does not apply to fprobe,
>> is there a crash when attaching function from kprobes/blacklist ?
> 
> No, I haven't got crash before, Simply because BCC's kprobe_exists has
> implemented it so I added this, Yes, I also don't think 
> kprobes/blacklist will affect FPROBE, so I will remove it.
> 
>>
>>> +
>>> +avail_filter:
>>> +    f = 
>>> fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
>>> +    if (!f)
>>> +        return true;
>>> +
>>> +    while (true) {
>>> +        ret = fscanf(f, "%s%*[^\n]\n", sym_name);
>>> +        if (ret == EOF && feof(f))
>>> +            break;
>>> +        if (ret != 1)
>>> +            break;
>>> +        if (!strcmp(name, sym_name)) {
>>> +            fclose(f);
>>> +            return true;
>>> +        }
>>> +    }
>>> +    fclose(f);
>>> +    return false;
>>> +}
>>> +
>>>   static int
>>>   resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
>>>               const char *sym_name, void *ctx)
>>> @@ -10431,6 +10475,9 @@ resolve_kprobe_multi_cb(unsigned long long 
>>> sym_addr, char sym_type,
>>>       if (!glob_match(sym_name, res->pattern))
>>>           return 0;
>>> +    if (!filter_available_function(sym_name))
>>> +        return 0;
>>
>> I think it'd be better to parse available_filter_functions directly
>> for kprobe_multi instead of filtering out kallsyms entries
>>
>> we could add libbpf_available_filter_functions_parse function with
>> similar callback to go over available_filter_functions file
>>
> 
> Sure, if available_filter_functions not found, fallback to /proc/kallsyms.
> 

Um.

It is difficult to judge available_filter_functions directly, because we
not only need the function name, but also obtain its address and other
information, but we can indeed obtain the function set from
available_filter_functions first, and then obtain the function address
from /proc/kallsyms. which will be slightly faster than reading
available_filter_functions later, because if this function does not
exist in available_filter_functions, it will take a long time to read
the entire file.

Of course, it would be better if the kernel directly provided an
available_filter_functions -like file containing function address
information.
Jiri Olsa May 24, 2023, 6:47 a.m. UTC | #5
On Wed, May 24, 2023 at 09:19:48AM +0800, Jackie Liu wrote:
> Hi Jiri.
> 
> 在 2023/5/24 09:03, Jackie Liu 写道:
> > Hi Jiri.
> > 
> > 在 2023/5/24 00:17, Jiri Olsa 写道:
> > > On Tue, May 23, 2023 at 09:25:47PM +0800, Jackie Liu wrote:
> > > > From: Jackie Liu <liuyun01@kylinos.cn>
> > > > 
> > > > When using regular expression matching with "kprobe multi", it scans all
> > > > the functions under "/proc/kallsyms" that can be matched.
> > > > However, not all
> > > > of them can be traced by kprobe.multi. If any one of the functions fails
> > > > to be traced, it will result in the failure of all functions. The best
> > > > approach is to filter out the functions that cannot be traced to ensure
> > > > proper tracking of the functions.
> > > > 
> > > > But, the addition of these checks will frequently probe whether
> > > > a function
> > > > complies with "available_filter_functions" and ensure that it
> > > > has not been
> > > > filtered by kprobe's blacklist. As a result, it may take a longer time
> > > > during startup. The function implementation is referenced from BCC's
> > > > "kprobe_exists()"
> > > > 
> > > > Here is the test eBPF program [1].
> > > > [1] https://github.com/JackieLiu1/ketones/commit/a9e76d1ba57390e533b8b3eadde97f7a4535e867
> > > > 
> > > > Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
> > > > ---
> > > >   tools/lib/bpf/libbpf.c | 47 ++++++++++++++++++++++++++++++++++++++++++
> > > >   1 file changed, 47 insertions(+)
> > > > 
> > > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> > > > index ad1ec893b41b..6a201267fa08 100644
> > > > --- a/tools/lib/bpf/libbpf.c
> > > > +++ b/tools/lib/bpf/libbpf.c
> > > > @@ -10421,6 +10421,50 @@ struct kprobe_multi_resolve {
> > > >       size_t cnt;
> > > >   };
> > > > +static bool filter_available_function(const char *name)
> > > > +{
> > > > +    char addr_range[256];
> > > > +    char sym_name[256];
> > > > +    FILE *f;
> > > > +    int ret;
> > > > +
> > > > +    f = fopen("/sys/kernel/debug/kprobes/blacklist", "r");
> > > > +    if (!f)
> > > > +        goto avail_filter;
> > > > +
> > > > +    while (true) {
> > > > +        ret = fscanf(f, "%s %s%*[^\n]\n", addr_range, sym_name);
> > > > +        if (ret == EOF && feof(f))
> > > > +            break;
> > > > +        if (ret != 2)
> > > > +            break;
> > > > +        if (!strcmp(name, sym_name)) {
> > > > +            fclose(f);
> > > > +            return false;
> > > > +        }
> > > > +    }
> > > > +    fclose(f);
> > > 
> > > so available_filter_functions already contains all traceable symbols
> > > for kprobe_multi/fprobe
> > > 
> > > kprobes/blacklist is kprobe specific and does not apply to fprobe,
> > > is there a crash when attaching function from kprobes/blacklist ?
> > 
> > No, I haven't got crash before, Simply because BCC's kprobe_exists has
> > implemented it so I added this, Yes, I also don't think
> > kprobes/blacklist will affect FPROBE, so I will remove it.
> > 
> > > 
> > > > +
> > > > +avail_filter:
> > > > +    f =
> > > > fopen("/sys/kernel/debug/tracing/available_filter_functions",
> > > > "r");
> > > > +    if (!f)
> > > > +        return true;
> > > > +
> > > > +    while (true) {
> > > > +        ret = fscanf(f, "%s%*[^\n]\n", sym_name);
> > > > +        if (ret == EOF && feof(f))
> > > > +            break;
> > > > +        if (ret != 1)
> > > > +            break;
> > > > +        if (!strcmp(name, sym_name)) {
> > > > +            fclose(f);
> > > > +            return true;
> > > > +        }
> > > > +    }
> > > > +    fclose(f);
> > > > +    return false;
> > > > +}
> > > > +
> > > >   static int
> > > >   resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
> > > >               const char *sym_name, void *ctx)
> > > > @@ -10431,6 +10475,9 @@ resolve_kprobe_multi_cb(unsigned long
> > > > long sym_addr, char sym_type,
> > > >       if (!glob_match(sym_name, res->pattern))
> > > >           return 0;
> > > > +    if (!filter_available_function(sym_name))
> > > > +        return 0;
> > > 
> > > I think it'd be better to parse available_filter_functions directly
> > > for kprobe_multi instead of filtering out kallsyms entries
> > > 
> > > we could add libbpf_available_filter_functions_parse function with
> > > similar callback to go over available_filter_functions file
> > > 
> > 
> > Sure, if available_filter_functions not found, fallback to /proc/kallsyms.
> > 
> 
> Um.
> 
> It is difficult to judge available_filter_functions directly, because we
> not only need the function name, but also obtain its address and other
> information, but we can indeed obtain the function set from
> available_filter_functions first, and then obtain the function address
> from /proc/kallsyms. which will be slightly faster than reading
> available_filter_functions later, because if this function does not
> exist in available_filter_functions, it will take a long time to read
> the entire file.
> 
> Of course, it would be better if the kernel directly provided an
> available_filter_functions -like file containing function address
> information.

you don't need to resolve symbols, you can pass just array of symbols
to create kprobe_multi link and they will get resolved in kernel:

	struct bpf_link_create_opts {

			struct {
				__u32 flags;
				__u32 cnt;
		--->		const char **syms;
				const unsigned long *addrs;
				const __u64 *cookies;
			} kprobe_multi;
	}

I resolved the symbols in bpf_program__attach_kprobe_multi_opts mostly
because the address was available right away when parsing kallsyms,
but passing just symbols for pattern is fine

jirka
Jiri Olsa May 24, 2023, 7:03 a.m. UTC | #6
On Tue, May 23, 2023 at 11:22:46AM -0700, Andrii Nakryiko wrote:

SNIP

> > > +avail_filter:
> > > +     f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
> > > +     if (!f)
> > > +             return true;
> > > +
> > > +     while (true) {
> > > +             ret = fscanf(f, "%s%*[^\n]\n", sym_name);
> > > +             if (ret == EOF && feof(f))
> > > +                     break;
> > > +             if (ret != 1)
> > > +                     break;
> > > +             if (!strcmp(name, sym_name)) {
> > > +                     fclose(f);
> > > +                     return true;
> > > +             }
> > > +     }
> > > +     fclose(f);
> > > +     return false;
> > > +}
> > > +
> > >  static int
> > >  resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
> > >                       const char *sym_name, void *ctx)
> > > @@ -10431,6 +10475,9 @@ resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
> > >       if (!glob_match(sym_name, res->pattern))
> > >               return 0;
> > >
> > > +     if (!filter_available_function(sym_name))
> > > +             return 0;
> >
> > I think it'd be better to parse available_filter_functions directly
> > for kprobe_multi instead of filtering out kallsyms entries
> 
> yep, available_filter_functions should be cheaper to parse than
> kallsyms. We can probably fallback to kallsyms still, if
> available_filter_functions are missing.
> 
> Furthermore, me and Steven chatted at lsfmm2023 about having an
> available_filter_functions-like file with kernel function addresses
> (not just names), which would speed up attachment as well. It could be
> useful in some other scenarios as well (e.g., I think retsnoop has to
> join kallsyms and available_filter_functions). I think it's still a
> good idea to add this new file, given kernel has all this information
> readily available anyways.

yes, would be useful for this, and likely in other places

jirka

> 
> 
> >
> > we could add libbpf_available_filter_functions_parse function with
> > similar callback to go over available_filter_functions file
> 
> or iterator ;)
> 
> but either way, current approach will do linear scan for each matched
> function, which is hugely inefficient, so definitely a no go
> 
> >
> >
> > jirka
> >
> > > +
> > >       err = libbpf_ensure_mem((void **) &res->addrs, &res->cap, sizeof(unsigned long),
> > >                               res->cnt + 1);
> > >       if (err)
> > > --
> > > 2.25.1
> > >
> > >
Jackie Liu May 24, 2023, 7:06 a.m. UTC | #7
Hi Jiri.

在 2023/5/24 14:47, Jiri Olsa 写道:
> On Wed, May 24, 2023 at 09:19:48AM +0800, Jackie Liu wrote:
>> Hi Jiri.
>>
>> 在 2023/5/24 09:03, Jackie Liu 写道:
>>> Hi Jiri.
>>>
>>> 在 2023/5/24 00:17, Jiri Olsa 写道:
>>>> On Tue, May 23, 2023 at 09:25:47PM +0800, Jackie Liu wrote:
>>>>> From: Jackie Liu <liuyun01@kylinos.cn>
>>>>>
>>>>> When using regular expression matching with "kprobe multi", it scans all
>>>>> the functions under "/proc/kallsyms" that can be matched.
>>>>> However, not all
>>>>> of them can be traced by kprobe.multi. If any one of the functions fails
>>>>> to be traced, it will result in the failure of all functions. The best
>>>>> approach is to filter out the functions that cannot be traced to ensure
>>>>> proper tracking of the functions.
>>>>>
>>>>> But, the addition of these checks will frequently probe whether
>>>>> a function
>>>>> complies with "available_filter_functions" and ensure that it
>>>>> has not been
>>>>> filtered by kprobe's blacklist. As a result, it may take a longer time
>>>>> during startup. The function implementation is referenced from BCC's
>>>>> "kprobe_exists()"
>>>>>
>>>>> Here is the test eBPF program [1].
>>>>> [1] https://github.com/JackieLiu1/ketones/commit/a9e76d1ba57390e533b8b3eadde97f7a4535e867
>>>>>
>>>>> Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
>>>>> ---
>>>>>    tools/lib/bpf/libbpf.c | 47 ++++++++++++++++++++++++++++++++++++++++++
>>>>>    1 file changed, 47 insertions(+)
>>>>>
>>>>> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
>>>>> index ad1ec893b41b..6a201267fa08 100644
>>>>> --- a/tools/lib/bpf/libbpf.c
>>>>> +++ b/tools/lib/bpf/libbpf.c
>>>>> @@ -10421,6 +10421,50 @@ struct kprobe_multi_resolve {
>>>>>        size_t cnt;
>>>>>    };
>>>>> +static bool filter_available_function(const char *name)
>>>>> +{
>>>>> +    char addr_range[256];
>>>>> +    char sym_name[256];
>>>>> +    FILE *f;
>>>>> +    int ret;
>>>>> +
>>>>> +    f = fopen("/sys/kernel/debug/kprobes/blacklist", "r");
>>>>> +    if (!f)
>>>>> +        goto avail_filter;
>>>>> +
>>>>> +    while (true) {
>>>>> +        ret = fscanf(f, "%s %s%*[^\n]\n", addr_range, sym_name);
>>>>> +        if (ret == EOF && feof(f))
>>>>> +            break;
>>>>> +        if (ret != 2)
>>>>> +            break;
>>>>> +        if (!strcmp(name, sym_name)) {
>>>>> +            fclose(f);
>>>>> +            return false;
>>>>> +        }
>>>>> +    }
>>>>> +    fclose(f);
>>>>
>>>> so available_filter_functions already contains all traceable symbols
>>>> for kprobe_multi/fprobe
>>>>
>>>> kprobes/blacklist is kprobe specific and does not apply to fprobe,
>>>> is there a crash when attaching function from kprobes/blacklist ?
>>>
>>> No, I haven't got crash before, Simply because BCC's kprobe_exists has
>>> implemented it so I added this, Yes, I also don't think
>>> kprobes/blacklist will affect FPROBE, so I will remove it.
>>>
>>>>
>>>>> +
>>>>> +avail_filter:
>>>>> +    f =
>>>>> fopen("/sys/kernel/debug/tracing/available_filter_functions",
>>>>> "r");
>>>>> +    if (!f)
>>>>> +        return true;
>>>>> +
>>>>> +    while (true) {
>>>>> +        ret = fscanf(f, "%s%*[^\n]\n", sym_name);
>>>>> +        if (ret == EOF && feof(f))
>>>>> +            break;
>>>>> +        if (ret != 1)
>>>>> +            break;
>>>>> +        if (!strcmp(name, sym_name)) {
>>>>> +            fclose(f);
>>>>> +            return true;
>>>>> +        }
>>>>> +    }
>>>>> +    fclose(f);
>>>>> +    return false;
>>>>> +}
>>>>> +
>>>>>    static int
>>>>>    resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
>>>>>                const char *sym_name, void *ctx)
>>>>> @@ -10431,6 +10475,9 @@ resolve_kprobe_multi_cb(unsigned long
>>>>> long sym_addr, char sym_type,
>>>>>        if (!glob_match(sym_name, res->pattern))
>>>>>            return 0;
>>>>> +    if (!filter_available_function(sym_name))
>>>>> +        return 0;
>>>>
>>>> I think it'd be better to parse available_filter_functions directly
>>>> for kprobe_multi instead of filtering out kallsyms entries
>>>>
>>>> we could add libbpf_available_filter_functions_parse function with
>>>> similar callback to go over available_filter_functions file
>>>>
>>>
>>> Sure, if available_filter_functions not found, fallback to /proc/kallsyms.
>>>
>>
>> Um.
>>
>> It is difficult to judge available_filter_functions directly, because we
>> not only need the function name, but also obtain its address and other
>> information, but we can indeed obtain the function set from
>> available_filter_functions first, and then obtain the function address
>> from /proc/kallsyms. which will be slightly faster than reading
>> available_filter_functions later, because if this function does not
>> exist in available_filter_functions, it will take a long time to read
>> the entire file.
>>
>> Of course, it would be better if the kernel directly provided an
>> available_filter_functions -like file containing function address
>> information.
> 
> you don't need to resolve symbols, you can pass just array of symbols
> to create kprobe_multi link and they will get resolved in kernel:
> 
> 	struct bpf_link_create_opts {
> 
> 			struct {
> 				__u32 flags;
> 				__u32 cnt;
> 		--->		const char **syms;
> 				const unsigned long *addrs;
> 				const __u64 *cookies;
> 			} kprobe_multi;
> 	}
> 
> I resolved the symbols in bpf_program__attach_kprobe_multi_opts mostly
> because the address was available right away when parsing kallsyms,
> but passing just symbols for pattern is fine

I see, let me try. Thanks.
diff mbox series

Patch

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ad1ec893b41b..6a201267fa08 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -10421,6 +10421,50 @@  struct kprobe_multi_resolve {
 	size_t cnt;
 };
 
+static bool filter_available_function(const char *name)
+{
+	char addr_range[256];
+	char sym_name[256];
+	FILE *f;
+	int ret;
+
+	f = fopen("/sys/kernel/debug/kprobes/blacklist", "r");
+	if (!f)
+		goto avail_filter;
+
+	while (true) {
+		ret = fscanf(f, "%s %s%*[^\n]\n", addr_range, sym_name);
+		if (ret == EOF && feof(f))
+			break;
+		if (ret != 2)
+			break;
+		if (!strcmp(name, sym_name)) {
+			fclose(f);
+			return false;
+		}
+	}
+	fclose(f);
+
+avail_filter:
+	f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
+	if (!f)
+		return true;
+
+	while (true) {
+		ret = fscanf(f, "%s%*[^\n]\n", sym_name);
+		if (ret == EOF && feof(f))
+			break;
+		if (ret != 1)
+			break;
+		if (!strcmp(name, sym_name)) {
+			fclose(f);
+			return true;
+		}
+	}
+	fclose(f);
+	return false;
+}
+
 static int
 resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
 			const char *sym_name, void *ctx)
@@ -10431,6 +10475,9 @@  resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
 	if (!glob_match(sym_name, res->pattern))
 		return 0;
 
+	if (!filter_available_function(sym_name))
+		return 0;
+
 	err = libbpf_ensure_mem((void **) &res->addrs, &res->cap, sizeof(unsigned long),
 				res->cnt + 1);
 	if (err)