diff mbox series

[bpf-next,v2,1/2] bpf: Helper script for running BPF presubmit tests

Message ID 20210123004445.299149-2-kpsingh@kernel.org (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series BPF selftest helper script | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 7 maintainers not CCed: shuah@kernel.org netdev@vger.kernel.org songliubraving@fb.com linux-kselftest@vger.kernel.org john.fastabend@gmail.com kafai@fb.com yhs@fb.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 82 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

KP Singh Jan. 23, 2021, 12:44 a.m. UTC
The script runs the BPF selftests locally on the same kernel image
as they would run post submit in the BPF continuous integration
framework.

The goal of the script is to allow contributors to run selftests locally
in the same environment to check if their changes would end up breaking
the BPF CI and reduce the back-and-forth between the maintainers and the
developers.

Signed-off-by: KP Singh <kpsingh@kernel.org>
---
 tools/testing/selftests/bpf/run_in_vm.sh | 353 +++++++++++++++++++++++
 1 file changed, 353 insertions(+)
 create mode 100755 tools/testing/selftests/bpf/run_in_vm.sh

Comments

Yonghong Song Jan. 24, 2021, 7:06 p.m. UTC | #1
On 1/22/21 4:44 PM, KP Singh wrote:
> The script runs the BPF selftests locally on the same kernel image
> as they would run post submit in the BPF continuous integration
> framework.
> 
> The goal of the script is to allow contributors to run selftests locally
> in the same environment to check if their changes would end up breaking
> the BPF CI and reduce the back-and-forth between the maintainers and the
> developers.
> 
> Signed-off-by: KP Singh <kpsingh@kernel.org>

Thanks! I tried the script, and it works great.

Tested-by: Yonghong Song <yhs@fb.com>

When I tried to apply the patch locally, I see the following warnings:
-bash-4.4$ git apply ~/p1.txt
/home/yhs/p1.txt:306: space before tab in indent.
                 : )
/home/yhs/p1.txt:307: space before tab in indent.
                         echo "Invalid Option: -$OPTARG requires an 
argument"
warning: 2 lines add whitespace errors.

Maybe you want to fix them.

One issue I found with the following script,
KBUILD_OUTPUT=/home/yhs/work/linux-bld/ 
tools/testing/selftests/bpf/run_in_vm.sh -- cat /sys/fs/bpf/progs.debug
I see the following warning:

[    1.081000] in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 
101, name: cat
[    1.081684] 3 locks held by cat/101: 

[    1.082032]  #0: ffff8880047770a0 (&p->lock){+.+.}-{3:3}, at: 
bpf_seq_read+0x3a/0x3d0
[    1.082734]  #1: ffffffff82d69800 (rcu_read_lock){....}-{1:2}, at: 
bpf_iter_run_prog+0x5/0x160
[    1.083521]  #2: ffff88800618c148 (&mm->mmap_lock#2){++++}-{3:3}, at: 
exc_page_fault+0x1a1/0x640
[    1.084344] Preemption disabled at: 

[    1.084346] [<ffffffff8108f913>] migrate_disable+0x33/0x80 

[    1.085207] CPU: 2 PID: 101 Comm: cat Not tainted 
5.11.0-rc4-00524-g6e66fbb10597-dirty #1257
[    1.085933] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), 
BIOS 1.9.3-1.el7.centos 04/01
/2014 

[    1.086747] Call Trace: 

[    1.086961]  dump_stack+0x77/0x97 

[    1.087294]  ___might_sleep.cold.119+0xf2/0x106 

[    1.087702]  exc_page_fault+0x1c1/0x640 

[    1.088056]  asm_exc_page_fault+0x1e/0x30 

[    1.088413] RIP: 
0010:bpf_prog_0a182df2d34af188_dump_bpf_prog+0xf5/0xbc8 

[    1.089009] Code: 00 00 8b 7d f4 41 8b 76 44 48 39 f7 73 06 48 01 fb 
49 89 df 4c 89 7d d8 49 8b
bd 20 01 00 00 48 89 7d e0 49 8b bd e0 00 00 00 <48> 8b 7f 20 48 01 d7 
48 89 7d e8 48 89 e9 48 83 c
1 d0 48 8b 7d c8 

[    1.090635] RSP: 0018:ffffc90000197dc8 EFLAGS: 00010282 

[    1.091100] RAX: 0000000000000000 RBX: ffff888005a60458 RCX: 
0000000000000024
[    1.091727] RDX: 00000000000002f0 RSI: 0000000000000509 RDI: 
0000000000000000
[    1.092384] RBP: ffffc90000197e20 R08: 0000000000000001 R09: 
0000000000000000
[    1.093014] R10: 0000000000000002 R11: 0000000000000000 R12: 
0000000000020000
[    1.093660] R13: ffff888006199800 R14: ffff88800474c480 R15: 
ffff888005a60458
[    1.094314]  ? bpf_prog_0a182df2d34af188_dump_bpf_prog+0xc8/0xbc8
[    1.094871]  bpf_iter_run_prog+0x75/0x160
[    1.095231]  __bpf_prog_seq_show+0x39/0x40
[    1.095602]  bpf_seq_read+0xf6/0x3d0
[    1.095915]  vfs_read+0xa3/0x1b0
[    1.096226]  ksys_read+0x4f/0xc0
[    1.096527]  do_syscall_64+0x2d/0x40
[    1.096831]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[    1.097287] RIP: 0033:0x7f13a43e3ec2
[    1.097625] Code: c0 e9 b2 fe ff ff 50 48 8d 3d aa 36 0a 00 e8 65 eb 
01 00 0f 1f 44 00 00 f3 0f
1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 
56 c3 0f 1f 44 00 00 48 83 e
c 28 48 89 54 24
[    1.099232] RSP: 002b:00007fffed256bb8 EFLAGS: 00000246 ORIG_RAX: 
0000000000000000
[    1.099922] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 
00007f13a43e3ec2
[    1.100576] RDX: 0000000000020000 RSI: 00007f13a42d0000 RDI: 
0000000000000003
[    1.101197] RBP: 00007f13a42d0000 R08: 00007f13a42cf010 R09: 
0000000000000000
[    1.101868] R10: 0000000000000022 R11: 0000000000000246 R12: 
0000561599794c00
[    1.102486] R13: 0000000000000003 R14: 0000000000020000 R15: 
0000000000020000

Note that above `cat` is called during /sbin/init init process.
......
[    0.964879] Run /sbin/init as init process 

starting pid 84, tty '': '/etc/init.d/rcS'
......

I checked the assembly code and the above error info and the reason
is due to an exception (address 0) happens in bpf_prog iterator.

SEC("iter/bpf_prog")
int dump_bpf_prog(struct bpf_iter__bpf_prog *ctx)
{
         struct seq_file *seq = ctx->meta->seq;
         __u64 seq_num = ctx->meta->seq_num;
         struct bpf_prog *prog = ctx->prog;
         struct bpf_prog_aux *aux;

         if (!prog)
                 return 0;

         aux = prog->aux;
         if (seq_num == 0)
                 BPF_SEQ_PRINTF(seq, "  id name             attached\n");

         BPF_SEQ_PRINTF(seq, "%4u %-16s %s %s\n", aux->id,
                        get_name(aux->btf, aux->func_info[0].type_id, 
aux->name),
                        aux->attach_func_name, aux->dst_prog->aux->name);
         return 0;
}

In the above, aux->dst_prog == 0 and exception does not get caught 
properly and kernel complains. This might be due to
ths `cat /sys/fs/bpf/progs.debug` is called too early (in init process)
and something is not set up properly yet.

In a different rootfs, I called `cat /sys/fs/bpf/progs.debug` after
login prompt, and I did not see the error.

If somebody knows what is the possible reason, that will be great.
Otherwise, I will continue to debug this later.

> ---
>   tools/testing/selftests/bpf/run_in_vm.sh | 353 +++++++++++++++++++++++
>   1 file changed, 353 insertions(+)
>   create mode 100755 tools/testing/selftests/bpf/run_in_vm.sh
> 
> diff --git a/tools/testing/selftests/bpf/run_in_vm.sh b/tools/testing/selftests/bpf/run_in_vm.sh
> new file mode 100755
> index 000000000000..09bb9705acb3
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/run_in_vm.sh
> @@ -0,0 +1,353 @@
> +#!/bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +
> +set -u
> +set -e
> +
> +QEMU_BINARY="${QEMU_BINARY:="qemu-system-x86_64"}"
> +X86_BZIMAGE="arch/x86/boot/bzImage"
> +DEFAULT_COMMAND="./test_progs"
> +MOUNT_DIR="mnt"
> +ROOTFS_IMAGE="root.img"
> +OUTPUT_DIR="$HOME/.bpf_selftests"
> +KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/latest.config "
> +INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX "
> +NUM_COMPILE_JOBS="$(nproc)"
> +
> +usage()
> +{
> +	cat <<EOF
> +Usage: $0 [-k] [-i] [-d <output_dir>] -- [<command>]
> +
> +<command> is the command you would normally run when you are in
> +tools/testing/selftests/bpf. e.g:
> +
> +	$0 -- ./test_progs -t test_lsm
> +
> +If no command is specified, "${DEFAULT_COMMAND}" will be run by
> +default.
> +
> +If you build your kernel using KBUILD_OUTPUT= or O= options, these
> +can be passed as environment variables to the script:
> +
> +  O=<path_relative_to_cwd> $0 -- ./test_progs -t test_lsm
> +
> +or
> +
> +  KBUILD_OUTPUT=<path_relative_to_cwd> $0 -- ./test_progs -t test_lsm
> +
> +Options:
> +
> +	-k)		"Keep the kernel", i.e. don't recompile the kernel if it exists.
> +	-i)		Update the rootfs image with a newer version.
> +	-d)		Update the output directory (default: ${OUTPUT_DIR})
> +	-j)		Number of jobs for compilation, similar to -j in make
> +			(default: ${NUM_COMPILE_JOBS})
> +EOF
> +}
> +
[...]
Yonghong Song Jan. 25, 2021, 5:21 a.m. UTC | #2
On 1/24/21 11:06 AM, Yonghong Song wrote:
> 
> 
> On 1/22/21 4:44 PM, KP Singh wrote:
>> The script runs the BPF selftests locally on the same kernel image
>> as they would run post submit in the BPF continuous integration
>> framework.
>>
>> The goal of the script is to allow contributors to run selftests locally
>> in the same environment to check if their changes would end up breaking
>> the BPF CI and reduce the back-and-forth between the maintainers and the
>> developers.
>>
>> Signed-off-by: KP Singh <kpsingh@kernel.org>
> 
> Thanks! I tried the script, and it works great.
> 
> Tested-by: Yonghong Song <yhs@fb.com>
> 
> When I tried to apply the patch locally, I see the following warnings:
> -bash-4.4$ git apply ~/p1.txt
> /home/yhs/p1.txt:306: space before tab in indent.
>                  : )
> /home/yhs/p1.txt:307: space before tab in indent.
>                          echo "Invalid Option: -$OPTARG requires an 
> argument"
> warning: 2 lines add whitespace errors.
> 
> Maybe you want to fix them.
> 
> One issue I found with the following script,
> KBUILD_OUTPUT=/home/yhs/work/linux-bld/ 
> tools/testing/selftests/bpf/run_in_vm.sh -- cat /sys/fs/bpf/progs.debug
> I see the following warning:
> 
> [    1.081000] in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 
> 101, name: cat
> [    1.081684] 3 locks held by cat/101:
> [    1.082032]  #0: ffff8880047770a0 (&p->lock){+.+.}-{3:3}, at: 
> bpf_seq_read+0x3a/0x3d0
> [    1.082734]  #1: ffffffff82d69800 (rcu_read_lock){....}-{1:2}, at: 
> bpf_iter_run_prog+0x5/0x160
> [    1.083521]  #2: ffff88800618c148 (&mm->mmap_lock#2){++++}-{3:3}, at: 
> exc_page_fault+0x1a1/0x640
> [    1.084344] Preemption disabled at:
> [    1.084346] [<ffffffff8108f913>] migrate_disable+0x33/0x80
> [    1.085207] CPU: 2 PID: 101 Comm: cat Not tainted 
> 5.11.0-rc4-00524-g6e66fbb10597-dirty #1257
> [    1.085933] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), 
> BIOS 1.9.3-1.el7.centos 04/01
> /2014
> [    1.086747] Call Trace:
> [    1.086961]  dump_stack+0x77/0x97
> [    1.087294]  ___might_sleep.cold.119+0xf2/0x106
> [    1.087702]  exc_page_fault+0x1c1/0x640
> [    1.088056]  asm_exc_page_fault+0x1e/0x30
> [    1.088413] RIP: 0010:bpf_prog_0a182df2d34af188_dump_bpf_prog+0xf5/0xbc8
> [    1.089009] Code: 00 00 8b 7d f4 41 8b 76 44 48 39 f7 73 06 48 01 fb 
> 49 89 df 4c 89 7d d8 49 8b
> bd 20 01 00 00 48 89 7d e0 49 8b bd e0 00 00 00 <48> 8b 7f 20 48 01 d7 
> 48 89 7d e8 48 89 e9 48 83 c
> 1 d0 48 8b 7d c8
> [    1.090635] RSP: 0018:ffffc90000197dc8 EFLAGS: 00010282
> [    1.091100] RAX: 0000000000000000 RBX: ffff888005a60458 RCX: 
> 0000000000000024
> [    1.091727] RDX: 00000000000002f0 RSI: 0000000000000509 RDI: 
> 0000000000000000
> [    1.092384] RBP: ffffc90000197e20 R08: 0000000000000001 R09: 
> 0000000000000000
> [    1.093014] R10: 0000000000000002 R11: 0000000000000000 R12: 
> 0000000000020000
> [    1.093660] R13: ffff888006199800 R14: ffff88800474c480 R15: 
> ffff888005a60458
> [    1.094314]  ? bpf_prog_0a182df2d34af188_dump_bpf_prog+0xc8/0xbc8
> [    1.094871]  bpf_iter_run_prog+0x75/0x160
> [    1.095231]  __bpf_prog_seq_show+0x39/0x40
> [    1.095602]  bpf_seq_read+0xf6/0x3d0
> [    1.095915]  vfs_read+0xa3/0x1b0
> [    1.096226]  ksys_read+0x4f/0xc0
> [    1.096527]  do_syscall_64+0x2d/0x40
> [    1.096831]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [    1.097287] RIP: 0033:0x7f13a43e3ec2
> [    1.097625] Code: c0 e9 b2 fe ff ff 50 48 8d 3d aa 36 0a 00 e8 65 eb 
> 01 00 0f 1f 44 00 00 f3 0f
> 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 
> 56 c3 0f 1f 44 00 00 48 83 e
> c 28 48 89 54 24
> [    1.099232] RSP: 002b:00007fffed256bb8 EFLAGS: 00000246 ORIG_RAX: 
> 0000000000000000
> [    1.099922] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 
> 00007f13a43e3ec2
> [    1.100576] RDX: 0000000000020000 RSI: 00007f13a42d0000 RDI: 
> 0000000000000003
> [    1.101197] RBP: 00007f13a42d0000 R08: 00007f13a42cf010 R09: 
> 0000000000000000
> [    1.101868] R10: 0000000000000022 R11: 0000000000000246 R12: 
> 0000561599794c00
> [    1.102486] R13: 0000000000000003 R14: 0000000000020000 R15: 
> 0000000000020000
> 
> Note that above `cat` is called during /sbin/init init process.
> ......
> [    0.964879] Run /sbin/init as init process
> starting pid 84, tty '': '/etc/init.d/rcS'
> ......
> 
> I checked the assembly code and the above error info and the reason
> is due to an exception (address 0) happens in bpf_prog iterator.
> 
> SEC("iter/bpf_prog")
> int dump_bpf_prog(struct bpf_iter__bpf_prog *ctx)
> {
>          struct seq_file *seq = ctx->meta->seq;
>          __u64 seq_num = ctx->meta->seq_num;
>          struct bpf_prog *prog = ctx->prog;
>          struct bpf_prog_aux *aux;
> 
>          if (!prog)
>                  return 0;
> 
>          aux = prog->aux;
>          if (seq_num == 0)
>                  BPF_SEQ_PRINTF(seq, "  id name             attached\n");
> 
>          BPF_SEQ_PRINTF(seq, "%4u %-16s %s %s\n", aux->id,
>                         get_name(aux->btf, aux->func_info[0].type_id, 
> aux->name),
>                         aux->attach_func_name, aux->dst_prog->aux->name);
>          return 0;
> }
> 
> In the above, aux->dst_prog == 0 and exception does not get caught 
> properly and kernel complains. This might be due to
> ths `cat /sys/fs/bpf/progs.debug` is called too early (in init process)
> and something is not set up properly yet.
> 
> In a different rootfs, I called `cat /sys/fs/bpf/progs.debug` after
> login prompt, and I did not see the error.
> 
> If somebody knows what is the possible reason, that will be great.
> Otherwise, I will continue to debug this later.

I did some investigation and found the root cause.

In arch/x86/mm/fault.c, function do_user_addr_fault(),

The following if condition is false when /sys/fs/bpf/progs.debug is
run during init time and is true when it is run after login prompt.

         if (unlikely(cpu_feature_enabled(X86_FEATURE_SMAP) &&
                      !(hw_error_code & X86_PF_USER) &&
                      !(regs->flags & X86_EFLAGS_AC)))
         {
                 bad_area_nosemaphore(regs, hw_error_code, address);
                 return;
         }

Specifically, cpu_feature_enabled(X86_FEATURE_SMAP) is false when bpf 
program is run at /sbin/init time and is true after login prompt.

The false condition eventually leads the control to the following
code in do_user_addr_fault().

         if (unlikely(!mmap_read_trylock(mm))) {
                 if (!user_mode(regs) && 
!search_exception_tables(regs->ip)) {
                         /*
                          * Fault from code in kernel from
                          * which we do not expect faults.
                          */
                         bad_area_nosemaphore(regs, hw_error_code, address);
                         return;
                 }
retry:
                 mmap_read_lock(mm);
         } else {
                 /*
                  * The above down_read_trylock() might have succeeded in
                  * which case we'll have missed the might_sleep() from
                  * down_read():
                  */
                 might_sleep();
         }

and since mmap_read_trylock(mm) is successful with return value 1,
might_sleep() is called and hence the warning.

> 
>> ---
>>   tools/testing/selftests/bpf/run_in_vm.sh | 353 +++++++++++++++++++++++
>>   1 file changed, 353 insertions(+)
>>   create mode 100755 tools/testing/selftests/bpf/run_in_vm.sh
>>
>> diff --git a/tools/testing/selftests/bpf/run_in_vm.sh 
>> b/tools/testing/selftests/bpf/run_in_vm.sh
>> new file mode 100755
>> index 000000000000..09bb9705acb3
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/run_in_vm.sh
>> @@ -0,0 +1,353 @@
>> +#!/bin/bash
>> +# SPDX-License-Identifier: GPL-2.0
>> +
>> +set -u
>> +set -e
>> +
>> +QEMU_BINARY="${QEMU_BINARY:="qemu-system-x86_64"}"
>> +X86_BZIMAGE="arch/x86/boot/bzImage"
>> +DEFAULT_COMMAND="./test_progs"
>> +MOUNT_DIR="mnt"
>> +ROOTFS_IMAGE="root.img"
>> +OUTPUT_DIR="$HOME/.bpf_selftests"
>> +KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/latest.config  
>> "
>> +INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX  
>> "
>> +NUM_COMPILE_JOBS="$(nproc)"
>> +
>> +usage()
>> +{
>> +    cat <<EOF
>> +Usage: $0 [-k] [-i] [-d <output_dir>] -- [<command>]
>> +
>> +<command> is the command you would normally run when you are in
>> +tools/testing/selftests/bpf. e.g:
>> +
>> +    $0 -- ./test_progs -t test_lsm
>> +
>> +If no command is specified, "${DEFAULT_COMMAND}" will be run by
>> +default.
>> +
>> +If you build your kernel using KBUILD_OUTPUT= or O= options, these
>> +can be passed as environment variables to the script:
>> +
>> +  O=<path_relative_to_cwd> $0 -- ./test_progs -t test_lsm
>> +
>> +or
>> +
>> +  KBUILD_OUTPUT=<path_relative_to_cwd> $0 -- ./test_progs -t test_lsm
>> +
>> +Options:
>> +
>> +    -k)        "Keep the kernel", i.e. don't recompile the kernel if 
>> it exists.
>> +    -i)        Update the rootfs image with a newer version.
>> +    -d)        Update the output directory (default: ${OUTPUT_DIR})
>> +    -j)        Number of jobs for compilation, similar to -j in make
>> +            (default: ${NUM_COMPILE_JOBS})
>> +EOF
>> +}
>> +
> [...]
KP Singh Jan. 25, 2021, 5:51 p.m. UTC | #3
On Sun, Jan 24, 2021 at 8:07 PM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 1/22/21 4:44 PM, KP Singh wrote:
> > The script runs the BPF selftests locally on the same kernel image
> > as they would run post submit in the BPF continuous integration
> > framework.
> >
> > The goal of the script is to allow contributors to run selftests locally
> > in the same environment to check if their changes would end up breaking
> > the BPF CI and reduce the back-and-forth between the maintainers and the
> > developers.
> >
> > Signed-off-by: KP Singh <kpsingh@kernel.org>
>
> Thanks! I tried the script, and it works great.
>
> Tested-by: Yonghong Song <yhs@fb.com>

Thanks!

>
> When I tried to apply the patch locally, I see the following warnings:
> -bash-4.4$ git apply ~/p1.txt
> /home/yhs/p1.txt:306: space before tab in indent.
>                  : )
> /home/yhs/p1.txt:307: space before tab in indent.
>                          echo "Invalid Option: -$OPTARG requires an
> argument"
> warning: 2 lines add whitespace errors.
>
> Maybe you want to fix them.

Sorry about that, fixed in the next revision.

>
> One issue I found with the following script,
> KBUILD_OUTPUT=/home/yhs/work/linux-bld/
> tools/testing/selftests/bpf/run_in_vm.sh -- cat /sys/fs/bpf/progs.debug
> I see the following warning:
>
> [    1.081000] in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid:
> 101, name: cat
> [    1.081684] 3 locks held by cat/101:
>
> [    1.082032]  #0: ffff8880047770a0 (&p->lock){+.+.}-{3:3}, at:
> bpf_seq_read+0x3a/0x3d0
> [    1.082734]  #1: ffffffff82d69800 (rcu_read_lock){....}-{1:2}, at:
> bpf_iter_run_prog+0x5/0x160
> [    1.083521]  #2: ffff88800618c148 (&mm->mmap_lock#2){++++}-{3:3}, at:
> exc_page_fault+0x1a1/0x640
> [    1.084344] Preemption disabled at:
>
> [    1.084346] [<ffffffff8108f913>] migrate_disable+0x33/0x80
>
> [    1.085207] CPU: 2 PID: 101 Comm: cat Not tainted
> 5.11.0-rc4-00524-g6e66fbb10597-dirty #1257
> [    1.085933] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
> BIOS 1.9.3-1.el7.centos 04/01
> /2014
>
> [    1.086747] Call Trace:
>
> [    1.086961]  dump_stack+0x77/0x97
>
> [    1.087294]  ___might_sleep.cold.119+0xf2/0x106
>
> [    1.087702]  exc_page_fault+0x1c1/0x640
>
> [    1.088056]  asm_exc_page_fault+0x1e/0x30
>
> [    1.088413] RIP:
> 0010:bpf_prog_0a182df2d34af188_dump_bpf_prog+0xf5/0xbc8
>
> [    1.089009] Code: 00 00 8b 7d f4 41 8b 76 44 48 39 f7 73 06 48 01 fb
> 49 89 df 4c 89 7d d8 49 8b
> bd 20 01 00 00 48 89 7d e0 49 8b bd e0 00 00 00 <48> 8b 7f 20 48 01 d7
> 48 89 7d e8 48 89 e9 48 83 c
> 1 d0 48 8b 7d c8
>
> [    1.090635] RSP: 0018:ffffc90000197dc8 EFLAGS: 00010282
>
> [    1.091100] RAX: 0000000000000000 RBX: ffff888005a60458 RCX:
> 0000000000000024
> [    1.091727] RDX: 00000000000002f0 RSI: 0000000000000509 RDI:
> 0000000000000000
> [    1.092384] RBP: ffffc90000197e20 R08: 0000000000000001 R09:
> 0000000000000000
> [    1.093014] R10: 0000000000000002 R11: 0000000000000000 R12:
> 0000000000020000
> [    1.093660] R13: ffff888006199800 R14: ffff88800474c480 R15:
> ffff888005a60458
> [    1.094314]  ? bpf_prog_0a182df2d34af188_dump_bpf_prog+0xc8/0xbc8
> [    1.094871]  bpf_iter_run_prog+0x75/0x160
> [    1.095231]  __bpf_prog_seq_show+0x39/0x40
> [    1.095602]  bpf_seq_read+0xf6/0x3d0
> [    1.095915]  vfs_read+0xa3/0x1b0
> [    1.096226]  ksys_read+0x4f/0xc0
> [    1.096527]  do_syscall_64+0x2d/0x40
> [    1.096831]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [    1.097287] RIP: 0033:0x7f13a43e3ec2
> [    1.097625] Code: c0 e9 b2 fe ff ff 50 48 8d 3d aa 36 0a 00 e8 65 eb
> 01 00 0f 1f 44 00 00 f3 0f
> 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77
> 56 c3 0f 1f 44 00 00 48 83 e
> c 28 48 89 54 24
> [    1.099232] RSP: 002b:00007fffed256bb8 EFLAGS: 00000246 ORIG_RAX:
> 0000000000000000
> [    1.099922] RAX: ffffffffffffffda RBX: 0000000000020000 RCX:
> 00007f13a43e3ec2
> [    1.100576] RDX: 0000000000020000 RSI: 00007f13a42d0000 RDI:
> 0000000000000003
> [    1.101197] RBP: 00007f13a42d0000 R08: 00007f13a42cf010 R09:
> 0000000000000000
> [    1.101868] R10: 0000000000000022 R11: 0000000000000246 R12:
> 0000561599794c00
> [    1.102486] R13: 0000000000000003 R14: 0000000000020000 R15:
> 0000000000020000
>
> Note that above `cat` is called during /sbin/init init process.
> ......
> [    0.964879] Run /sbin/init as init process
>
> starting pid 84, tty '': '/etc/init.d/rcS'
> ......
>
> I checked the assembly code and the above error info and the reason
> is due to an exception (address 0) happens in bpf_prog iterator.
>
> SEC("iter/bpf_prog")
> int dump_bpf_prog(struct bpf_iter__bpf_prog *ctx)
> {
>          struct seq_file *seq = ctx->meta->seq;
>          __u64 seq_num = ctx->meta->seq_num;
>          struct bpf_prog *prog = ctx->prog;
>          struct bpf_prog_aux *aux;
>
>          if (!prog)
>                  return 0;
>
>          aux = prog->aux;
>          if (seq_num == 0)
>                  BPF_SEQ_PRINTF(seq, "  id name             attached\n");
>
>          BPF_SEQ_PRINTF(seq, "%4u %-16s %s %s\n", aux->id,
>                         get_name(aux->btf, aux->func_info[0].type_id,
> aux->name),
>                         aux->attach_func_name, aux->dst_prog->aux->name);
>          return 0;
> }
>
> In the above, aux->dst_prog == 0 and exception does not get caught
> properly and kernel complains. This might be due to
> ths `cat /sys/fs/bpf/progs.debug` is called too early (in init process)
> and something is not set up properly yet.
>
> In a different rootfs, I called `cat /sys/fs/bpf/progs.debug` after
> login prompt, and I did not see the error.
>
> If somebody knows what is the possible reason, that will be great.
> Otherwise, I will continue to debug this later.
>
> > ---
> >   tools/testing/selftests/bpf/run_in_vm.sh | 353 +++++++++++++++++++++++
> >   1 file changed, 353 insertions(+)
> >   create mode 100755 tools/testing/selftests/bpf/run_in_vm.sh
> >
> > diff --git a/tools/testing/selftests/bpf/run_in_vm.sh b/tools/testing/selftests/bpf/run_in_vm.sh
> > new file mode 100755
> > index 000000000000..09bb9705acb3
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/run_in_vm.sh
> > @@ -0,0 +1,353 @@
> > +#!/bin/bash
> > +# SPDX-License-Identifier: GPL-2.0
> > +
> > +set -u
> > +set -e
> > +
> > +QEMU_BINARY="${QEMU_BINARY:="qemu-system-x86_64"}"
> > +X86_BZIMAGE="arch/x86/boot/bzImage"
> > +DEFAULT_COMMAND="./test_progs"
> > +MOUNT_DIR="mnt"
> > +ROOTFS_IMAGE="root.img"
> > +OUTPUT_DIR="$HOME/.bpf_selftests"
> > +KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/latest.config "
> > +INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX "
> > +NUM_COMPILE_JOBS="$(nproc)"
> > +
> > +usage()
> > +{
> > +     cat <<EOF
> > +Usage: $0 [-k] [-i] [-d <output_dir>] -- [<command>]
> > +
> > +<command> is the command you would normally run when you are in
> > +tools/testing/selftests/bpf. e.g:
> > +
> > +     $0 -- ./test_progs -t test_lsm
> > +
> > +If no command is specified, "${DEFAULT_COMMAND}" will be run by
> > +default.
> > +
> > +If you build your kernel using KBUILD_OUTPUT= or O= options, these
> > +can be passed as environment variables to the script:
> > +
> > +  O=<path_relative_to_cwd> $0 -- ./test_progs -t test_lsm
> > +
> > +or
> > +
> > +  KBUILD_OUTPUT=<path_relative_to_cwd> $0 -- ./test_progs -t test_lsm
> > +
> > +Options:
> > +
> > +     -k)             "Keep the kernel", i.e. don't recompile the kernel if it exists.
> > +     -i)             Update the rootfs image with a newer version.
> > +     -d)             Update the output directory (default: ${OUTPUT_DIR})
> > +     -j)             Number of jobs for compilation, similar to -j in make
> > +                     (default: ${NUM_COMPILE_JOBS})
> > +EOF
> > +}
> > +
> [...]
KP Singh Jan. 25, 2021, 5:53 p.m. UTC | #4
On Mon, Jan 25, 2021 at 6:22 AM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 1/24/21 11:06 AM, Yonghong Song wrote:
> >
> >
> > On 1/22/21 4:44 PM, KP Singh wrote:
> >> The script runs the BPF selftests locally on the same kernel image
> >> as they would run post submit in the BPF continuous integration
> >> framework.
> >>
> >> The goal of the script is to allow contributors to run selftests locally
> >> in the same environment to check if their changes would end up breaking
> >> the BPF CI and reduce the back-and-forth between the maintainers and the
> >> developers.
> >>
> >> Signed-off-by: KP Singh <kpsingh@kernel.org>
> >
> > Thanks! I tried the script, and it works great.
> >
> > Tested-by: Yonghong Song <yhs@fb.com>
> >
> > When I tried to apply the patch locally, I see the following warnings:
> > -bash-4.4$ git apply ~/p1.txt
> > /home/yhs/p1.txt:306: space before tab in indent.
> >                  : )
> > /home/yhs/p1.txt:307: space before tab in indent.
> >                          echo "Invalid Option: -$OPTARG requires an
> > argument"
> > warning: 2 lines add whitespace errors.
> >
> > Maybe you want to fix them.
> >
> > One issue I found with the following script,
> > KBUILD_OUTPUT=/home/yhs/work/linux-bld/
> > tools/testing/selftests/bpf/run_in_vm.sh -- cat /sys/fs/bpf/progs.debug
> > I see the following warning:
> >
> > [    1.081000] in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid:
> > 101, name: cat
> > [    1.081684] 3 locks held by cat/101:
> > [    1.082032]  #0: ffff8880047770a0 (&p->lock){+.+.}-{3:3}, at:
> > bpf_seq_read+0x3a/0x3d0
> > [    1.082734]  #1: ffffffff82d69800 (rcu_read_lock){....}-{1:2}, at:
> > bpf_iter_run_prog+0x5/0x160
> > [    1.083521]  #2: ffff88800618c148 (&mm->mmap_lock#2){++++}-{3:3}, at:
> > exc_page_fault+0x1a1/0x640
> > [    1.084344] Preemption disabled at:
> > [    1.084346] [<ffffffff8108f913>] migrate_disable+0x33/0x80
> > [    1.085207] CPU: 2 PID: 101 Comm: cat Not tainted
> > 5.11.0-rc4-00524-g6e66fbb10597-dirty #1257
> > [    1.085933] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
> > BIOS 1.9.3-1.el7.centos 04/01
> > /2014
> > [    1.086747] Call Trace:
> > [    1.086961]  dump_stack+0x77/0x97
> > [    1.087294]  ___might_sleep.cold.119+0xf2/0x106
> > [    1.087702]  exc_page_fault+0x1c1/0x640
> > [    1.088056]  asm_exc_page_fault+0x1e/0x30
> > [    1.088413] RIP: 0010:bpf_prog_0a182df2d34af188_dump_bpf_prog+0xf5/0xbc8
> > [    1.089009] Code: 00 00 8b 7d f4 41 8b 76 44 48 39 f7 73 06 48 01 fb
> > 49 89 df 4c 89 7d d8 49 8b
> > bd 20 01 00 00 48 89 7d e0 49 8b bd e0 00 00 00 <48> 8b 7f 20 48 01 d7
> > 48 89 7d e8 48 89 e9 48 83 c
> > 1 d0 48 8b 7d c8
> > [    1.090635] RSP: 0018:ffffc90000197dc8 EFLAGS: 00010282
> > [    1.091100] RAX: 0000000000000000 RBX: ffff888005a60458 RCX:
> > 0000000000000024
> > [    1.091727] RDX: 00000000000002f0 RSI: 0000000000000509 RDI:
> > 0000000000000000
> > [    1.092384] RBP: ffffc90000197e20 R08: 0000000000000001 R09:
> > 0000000000000000
> > [    1.093014] R10: 0000000000000002 R11: 0000000000000000 R12:
> > 0000000000020000
> > [    1.093660] R13: ffff888006199800 R14: ffff88800474c480 R15:
> > ffff888005a60458
> > [    1.094314]  ? bpf_prog_0a182df2d34af188_dump_bpf_prog+0xc8/0xbc8
> > [    1.094871]  bpf_iter_run_prog+0x75/0x160
> > [    1.095231]  __bpf_prog_seq_show+0x39/0x40
> > [    1.095602]  bpf_seq_read+0xf6/0x3d0
> > [    1.095915]  vfs_read+0xa3/0x1b0
> > [    1.096226]  ksys_read+0x4f/0xc0
> > [    1.096527]  do_syscall_64+0x2d/0x40
> > [    1.096831]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> > [    1.097287] RIP: 0033:0x7f13a43e3ec2
> > [    1.097625] Code: c0 e9 b2 fe ff ff 50 48 8d 3d aa 36 0a 00 e8 65 eb
> > 01 00 0f 1f 44 00 00 f3 0f
> > 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77
> > 56 c3 0f 1f 44 00 00 48 83 e
> > c 28 48 89 54 24
> > [    1.099232] RSP: 002b:00007fffed256bb8 EFLAGS: 00000246 ORIG_RAX:
> > 0000000000000000
> > [    1.099922] RAX: ffffffffffffffda RBX: 0000000000020000 RCX:
> > 00007f13a43e3ec2
> > [    1.100576] RDX: 0000000000020000 RSI: 00007f13a42d0000 RDI:
> > 0000000000000003
> > [    1.101197] RBP: 00007f13a42d0000 R08: 00007f13a42cf010 R09:
> > 0000000000000000
> > [    1.101868] R10: 0000000000000022 R11: 0000000000000246 R12:
> > 0000561599794c00
> > [    1.102486] R13: 0000000000000003 R14: 0000000000020000 R15:
> > 0000000000020000
> >
> > Note that above `cat` is called during /sbin/init init process.
> > ......
> > [    0.964879] Run /sbin/init as init process
> > starting pid 84, tty '': '/etc/init.d/rcS'
> > ......
> >
> > I checked the assembly code and the above error info and the reason
> > is due to an exception (address 0) happens in bpf_prog iterator.
> >
> > SEC("iter/bpf_prog")
> > int dump_bpf_prog(struct bpf_iter__bpf_prog *ctx)
> > {
> >          struct seq_file *seq = ctx->meta->seq;
> >          __u64 seq_num = ctx->meta->seq_num;
> >          struct bpf_prog *prog = ctx->prog;
> >          struct bpf_prog_aux *aux;
> >
> >          if (!prog)
> >                  return 0;
> >
> >          aux = prog->aux;
> >          if (seq_num == 0)
> >                  BPF_SEQ_PRINTF(seq, "  id name             attached\n");
> >
> >          BPF_SEQ_PRINTF(seq, "%4u %-16s %s %s\n", aux->id,
> >                         get_name(aux->btf, aux->func_info[0].type_id,
> > aux->name),
> >                         aux->attach_func_name, aux->dst_prog->aux->name);
> >          return 0;
> > }
> >
> > In the above, aux->dst_prog == 0 and exception does not get caught
> > properly and kernel complains. This might be due to
> > ths `cat /sys/fs/bpf/progs.debug` is called too early (in init process)
> > and something is not set up properly yet.
> >
> > In a different rootfs, I called `cat /sys/fs/bpf/progs.debug` after
> > login prompt, and I did not see the error.
> >
> > If somebody knows what is the possible reason, that will be great.
> > Otherwise, I will continue to debug this later.
>
> I did some investigation and found the root cause.
>
> In arch/x86/mm/fault.c, function do_user_addr_fault(),
>
> The following if condition is false when /sys/fs/bpf/progs.debug is
> run during init time and is true when it is run after login prompt.
>
>          if (unlikely(cpu_feature_enabled(X86_FEATURE_SMAP) &&
>                       !(hw_error_code & X86_PF_USER) &&
>                       !(regs->flags & X86_EFLAGS_AC)))
>          {
>                  bad_area_nosemaphore(regs, hw_error_code, address);
>                  return;
>          }
>
> Specifically, cpu_feature_enabled(X86_FEATURE_SMAP) is false when bpf
> program is run at /sbin/init time and is true after login prompt.
>
> The false condition eventually leads the control to the following
> code in do_user_addr_fault().
>
>          if (unlikely(!mmap_read_trylock(mm))) {
>                  if (!user_mode(regs) &&
> !search_exception_tables(regs->ip)) {
>                          /*
>                           * Fault from code in kernel from
>                           * which we do not expect faults.
>                           */
>                          bad_area_nosemaphore(regs, hw_error_code, address);
>                          return;
>                  }
> retry:
>                  mmap_read_lock(mm);
>          } else {
>                  /*
>                   * The above down_read_trylock() might have succeeded in
>                   * which case we'll have missed the might_sleep() from
>                   * down_read():
>                   */
>                  might_sleep();
>          }
>
> and since mmap_read_trylock(mm) is successful with return value 1,
> might_sleep() is called and hence the warning.

Do you think this needs to be worked around in the script? If so, I would
prefer to do it in a separate patch so that we capture all the details.

- KP

>
> >

[...]

> >> +}
> >> +
> > [...]
Yonghong Song Jan. 25, 2021, 7:42 p.m. UTC | #5
On 1/25/21 9:53 AM, KP Singh wrote:
> On Mon, Jan 25, 2021 at 6:22 AM Yonghong Song <yhs@fb.com> wrote:
>>
>>
>>
>> On 1/24/21 11:06 AM, Yonghong Song wrote:
>>>
>>>
>>> On 1/22/21 4:44 PM, KP Singh wrote:
>>>> The script runs the BPF selftests locally on the same kernel image
>>>> as they would run post submit in the BPF continuous integration
>>>> framework.
>>>>
>>>> The goal of the script is to allow contributors to run selftests locally
>>>> in the same environment to check if their changes would end up breaking
>>>> the BPF CI and reduce the back-and-forth between the maintainers and the
>>>> developers.
>>>>
>>>> Signed-off-by: KP Singh <kpsingh@kernel.org>
>>>
>>> Thanks! I tried the script, and it works great.
>>>
>>> Tested-by: Yonghong Song <yhs@fb.com>
>>>
>>> When I tried to apply the patch locally, I see the following warnings:
>>> -bash-4.4$ git apply ~/p1.txt
>>> /home/yhs/p1.txt:306: space before tab in indent.
>>>                   : )
>>> /home/yhs/p1.txt:307: space before tab in indent.
>>>                           echo "Invalid Option: -$OPTARG requires an
>>> argument"
>>> warning: 2 lines add whitespace errors.
>>>
>>> Maybe you want to fix them.
>>>
>>> One issue I found with the following script,
>>> KBUILD_OUTPUT=/home/yhs/work/linux-bld/
>>> tools/testing/selftests/bpf/run_in_vm.sh -- cat /sys/fs/bpf/progs.debug
>>> I see the following warning:
>>>
>>> [    1.081000] in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid:
>>> 101, name: cat
>>> [    1.081684] 3 locks held by cat/101:
>>> [    1.082032]  #0: ffff8880047770a0 (&p->lock){+.+.}-{3:3}, at:
>>> bpf_seq_read+0x3a/0x3d0
>>> [    1.082734]  #1: ffffffff82d69800 (rcu_read_lock){....}-{1:2}, at:
>>> bpf_iter_run_prog+0x5/0x160
>>> [    1.083521]  #2: ffff88800618c148 (&mm->mmap_lock#2){++++}-{3:3}, at:
>>> exc_page_fault+0x1a1/0x640
>>> [    1.084344] Preemption disabled at:
>>> [    1.084346] [<ffffffff8108f913>] migrate_disable+0x33/0x80
>>> [    1.085207] CPU: 2 PID: 101 Comm: cat Not tainted
>>> 5.11.0-rc4-00524-g6e66fbb10597-dirty #1257
>>> [    1.085933] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
>>> BIOS 1.9.3-1.el7.centos 04/01
>>> /2014
>>> [    1.086747] Call Trace:
>>> [    1.086961]  dump_stack+0x77/0x97
>>> [    1.087294]  ___might_sleep.cold.119+0xf2/0x106
>>> [    1.087702]  exc_page_fault+0x1c1/0x640
>>> [    1.088056]  asm_exc_page_fault+0x1e/0x30
>>> [    1.088413] RIP: 0010:bpf_prog_0a182df2d34af188_dump_bpf_prog+0xf5/0xbc8
>>> [    1.089009] Code: 00 00 8b 7d f4 41 8b 76 44 48 39 f7 73 06 48 01 fb
>>> 49 89 df 4c 89 7d d8 49 8b
>>> bd 20 01 00 00 48 89 7d e0 49 8b bd e0 00 00 00 <48> 8b 7f 20 48 01 d7
>>> 48 89 7d e8 48 89 e9 48 83 c
>>> 1 d0 48 8b 7d c8
>>> [    1.090635] RSP: 0018:ffffc90000197dc8 EFLAGS: 00010282
>>> [    1.091100] RAX: 0000000000000000 RBX: ffff888005a60458 RCX:
>>> 0000000000000024
>>> [    1.091727] RDX: 00000000000002f0 RSI: 0000000000000509 RDI:
>>> 0000000000000000
>>> [    1.092384] RBP: ffffc90000197e20 R08: 0000000000000001 R09:
>>> 0000000000000000
>>> [    1.093014] R10: 0000000000000002 R11: 0000000000000000 R12:
>>> 0000000000020000
>>> [    1.093660] R13: ffff888006199800 R14: ffff88800474c480 R15:
>>> ffff888005a60458
>>> [    1.094314]  ? bpf_prog_0a182df2d34af188_dump_bpf_prog+0xc8/0xbc8
>>> [    1.094871]  bpf_iter_run_prog+0x75/0x160
>>> [    1.095231]  __bpf_prog_seq_show+0x39/0x40
>>> [    1.095602]  bpf_seq_read+0xf6/0x3d0
>>> [    1.095915]  vfs_read+0xa3/0x1b0
>>> [    1.096226]  ksys_read+0x4f/0xc0
>>> [    1.096527]  do_syscall_64+0x2d/0x40
>>> [    1.096831]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
>>> [    1.097287] RIP: 0033:0x7f13a43e3ec2
>>> [    1.097625] Code: c0 e9 b2 fe ff ff 50 48 8d 3d aa 36 0a 00 e8 65 eb
>>> 01 00 0f 1f 44 00 00 f3 0f
>>> 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77
>>> 56 c3 0f 1f 44 00 00 48 83 e
>>> c 28 48 89 54 24
>>> [    1.099232] RSP: 002b:00007fffed256bb8 EFLAGS: 00000246 ORIG_RAX:
>>> 0000000000000000
>>> [    1.099922] RAX: ffffffffffffffda RBX: 0000000000020000 RCX:
>>> 00007f13a43e3ec2
>>> [    1.100576] RDX: 0000000000020000 RSI: 00007f13a42d0000 RDI:
>>> 0000000000000003
>>> [    1.101197] RBP: 00007f13a42d0000 R08: 00007f13a42cf010 R09:
>>> 0000000000000000
>>> [    1.101868] R10: 0000000000000022 R11: 0000000000000246 R12:
>>> 0000561599794c00
>>> [    1.102486] R13: 0000000000000003 R14: 0000000000020000 R15:
>>> 0000000000020000
>>>
>>> Note that above `cat` is called during /sbin/init init process.
>>> ......
>>> [    0.964879] Run /sbin/init as init process
>>> starting pid 84, tty '': '/etc/init.d/rcS'
>>> ......
>>>
>>> I checked the assembly code and the above error info and the reason
>>> is due to an exception (address 0) happens in bpf_prog iterator.
>>>
>>> SEC("iter/bpf_prog")
>>> int dump_bpf_prog(struct bpf_iter__bpf_prog *ctx)
>>> {
>>>           struct seq_file *seq = ctx->meta->seq;
>>>           __u64 seq_num = ctx->meta->seq_num;
>>>           struct bpf_prog *prog = ctx->prog;
>>>           struct bpf_prog_aux *aux;
>>>
>>>           if (!prog)
>>>                   return 0;
>>>
>>>           aux = prog->aux;
>>>           if (seq_num == 0)
>>>                   BPF_SEQ_PRINTF(seq, "  id name             attached\n");
>>>
>>>           BPF_SEQ_PRINTF(seq, "%4u %-16s %s %s\n", aux->id,
>>>                          get_name(aux->btf, aux->func_info[0].type_id,
>>> aux->name),
>>>                          aux->attach_func_name, aux->dst_prog->aux->name);
>>>           return 0;
>>> }
>>>
>>> In the above, aux->dst_prog == 0 and exception does not get caught
>>> properly and kernel complains. This might be due to
>>> ths `cat /sys/fs/bpf/progs.debug` is called too early (in init process)
>>> and something is not set up properly yet.
>>>
>>> In a different rootfs, I called `cat /sys/fs/bpf/progs.debug` after
>>> login prompt, and I did not see the error.
>>>
>>> If somebody knows what is the possible reason, that will be great.
>>> Otherwise, I will continue to debug this later.
>>
>> I did some investigation and found the root cause.
>>
>> In arch/x86/mm/fault.c, function do_user_addr_fault(),
>>
>> The following if condition is false when /sys/fs/bpf/progs.debug is
>> run during init time and is true when it is run after login prompt.
>>
>>           if (unlikely(cpu_feature_enabled(X86_FEATURE_SMAP) &&
>>                        !(hw_error_code & X86_PF_USER) &&
>>                        !(regs->flags & X86_EFLAGS_AC)))
>>           {
>>                   bad_area_nosemaphore(regs, hw_error_code, address);
>>                   return;
>>           }
>>
>> Specifically, cpu_feature_enabled(X86_FEATURE_SMAP) is false when bpf
>> program is run at /sbin/init time and is true after login prompt.
>>
>> The false condition eventually leads the control to the following
>> code in do_user_addr_fault().
>>
>>           if (unlikely(!mmap_read_trylock(mm))) {
>>                   if (!user_mode(regs) &&
>> !search_exception_tables(regs->ip)) {
>>                           /*
>>                            * Fault from code in kernel from
>>                            * which we do not expect faults.
>>                            */
>>                           bad_area_nosemaphore(regs, hw_error_code, address);
>>                           return;
>>                   }
>> retry:
>>                   mmap_read_lock(mm);
>>           } else {
>>                   /*
>>                    * The above down_read_trylock() might have succeeded in
>>                    * which case we'll have missed the might_sleep() from
>>                    * down_read():
>>                    */
>>                   might_sleep();
>>           }
>>
>> and since mmap_read_trylock(mm) is successful with return value 1,
>> might_sleep() is called and hence the warning.
> 
> Do you think this needs to be worked around in the script? If so, I would
> prefer to do it in a separate patch so that we capture all the details.

I do not know how to fix it right now and am doing some further 
investigation. Agree that this should not block your current patch.

> 
> - KP
> 
>>
>>>
> 
> [...]
> 
>>>> +}
>>>> +
>>> [...]
Jiri Olsa Jan. 25, 2021, 10:19 p.m. UTC | #6
On Sat, Jan 23, 2021 at 12:44:44AM +0000, KP Singh wrote:
> The script runs the BPF selftests locally on the same kernel image
> as they would run post submit in the BPF continuous integration
> framework.
> 
> The goal of the script is to allow contributors to run selftests locally
> in the same environment to check if their changes would end up breaking
> the BPF CI and reduce the back-and-forth between the maintainers and the
> developers.
> 
> Signed-off-by: KP Singh <kpsingh@kernel.org>

great! thanks for this

Tested-by: Jiri Olsa <jolsa@redhat.com>

jirka

> ---
>  tools/testing/selftests/bpf/run_in_vm.sh | 353 +++++++++++++++++++++++
>  1 file changed, 353 insertions(+)
>  create mode 100755 tools/testing/selftests/bpf/run_in_vm.sh
> 
> diff --git a/tools/testing/selftests/bpf/run_in_vm.sh b/tools/testing/selftests/bpf/run_in_vm.sh
> new file mode 100755
> index 000000000000..09bb9705acb3
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/run_in_vm.sh
> @@ -0,0 +1,353 @@
> +#!/bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +
> +set -u
> +set -e
> +
> +QEMU_BINARY="${QEMU_BINARY:="qemu-system-x86_64"}"
> +X86_BZIMAGE="arch/x86/boot/bzImage"
> +DEFAULT_COMMAND="./test_progs"
> +MOUNT_DIR="mnt"
> +ROOTFS_IMAGE="root.img"
> +OUTPUT_DIR="$HOME/.bpf_selftests"
> +KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/latest.config"
> +INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX"
> +NUM_COMPILE_JOBS="$(nproc)"
> +
> +usage()
> +{
> +	cat <<EOF
> +Usage: $0 [-k] [-i] [-d <output_dir>] -- [<command>]
> +
> +<command> is the command you would normally run when you are in
> +tools/testing/selftests/bpf. e.g:
> +
> +	$0 -- ./test_progs -t test_lsm
> +
> +If no command is specified, "${DEFAULT_COMMAND}" will be run by
> +default.
> +
> +If you build your kernel using KBUILD_OUTPUT= or O= options, these
> +can be passed as environment variables to the script:
> +
> +  O=<path_relative_to_cwd> $0 -- ./test_progs -t test_lsm
> +
> +or
> +
> +  KBUILD_OUTPUT=<path_relative_to_cwd> $0 -- ./test_progs -t test_lsm
> +
> +Options:
> +
> +	-k)		"Keep the kernel", i.e. don't recompile the kernel if it exists.
> +	-i)		Update the rootfs image with a newer version.
> +	-d)		Update the output directory (default: ${OUTPUT_DIR})
> +	-j)		Number of jobs for compilation, similar to -j in make
> +			(default: ${NUM_COMPILE_JOBS})
> +EOF
> +}
> +
> +unset URLS
> +populate_url_map()
> +{
> +	if ! declare -p URLS &> /dev/null; then
> +		# URLS contain the mapping from file names to URLs where
> +		# those files can be downloaded from.
> +		declare -gA URLS
> +		while IFS=$'\t' read -r name url; do
> +			URLS["$name"]="$url"
> +		done < <(curl -Lsf ${INDEX_URL})
> +	fi
> +	echo "${URLS[*]}"
> +}
> +
> +download()
> +{
> +	local file="$1"
> +
> +	if [[ ! -v URLS[$file] ]]; then
> +		echo "$file not found" >&2
> +		return 1
> +	fi
> +
> +	echo "Downloading $file..." >&2
> +	curl -Lf "${URLS[$file]}" "${@:2}"
> +}
> +
> +newest_rootfs_version()
> +{
> +	{
> +	for file in "${!URLS[@]}"; do
> +		if [[ $file =~ ^libbpf-vmtest-rootfs-(.*)\.tar\.zst$ ]]; then
> +			echo "${BASH_REMATCH[1]}"
> +		fi
> +	done
> +	} | sort -rV | head -1
> +}
> +
> +download_rootfs()
> +{
> +	local rootfsversion="$1"
> +	local dir="$2"
> +
> +	if ! which zstd &> /dev/null; then
> +		echo 'Could not find "zstd" on the system, please install zstd'
> +		exit 1
> +	fi
> +
> +	download "libbpf-vmtest-rootfs-$rootfsversion.tar.zst" |
> +		zstd -d | sudo tar -C "$dir" -x
> +}
> +
> +recompile_kernel()
> +{
> +	local kernel_checkout="$1"
> +	local make_command="$2"
> +
> +	cd "${kernel_checkout}"
> +
> +	${make_command} olddefconfig
> +	${make_command}
> +}
> +
> +mount_image()
> +{
> +	local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}"
> +	local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
> +
> +	sudo mount -o loop "${rootfs_img}" "${mount_dir}"
> +}
> +
> +unmount_image()
> +{
> +	local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
> +
> +	sudo umount "${mount_dir}" &> /dev/null
> +}
> +
> +update_selftests()
> +{
> +	local kernel_checkout="$1"
> +	local selftests_dir="${kernel_checkout}/tools/testing/selftests/bpf"
> +
> +	cd "${selftests_dir}"
> +	${make_command}
> +
> +	# Mount the image and copy the selftests to the image.
> +	mount_image
> +	sudo rm -rf "${mount_dir}/root/bpf"
> +	sudo cp -r "${selftests_dir}" "${mount_dir}/root"
> +	unmount_image
> +}
> +
> +update_init_script()
> +{
> +	local init_script_dir="${OUTPUT_DIR}/${MOUNT_DIR}/etc/rcS.d"
> +	local init_script="${init_script_dir}/S50-startup"
> +	local command="$1"
> +	local log_file="$2"
> +
> +	mount_image
> +
> +	if [[ ! -d "${init_script_dir}" ]]; then
> +		cat <<EOF
> +Could not find ${init_script_dir} in the mounted image.
> +This likely indicates a bad rootfs image, Please download
> +a new image by passing "-i" to the script
> +EOF
> +		exit 1
> +
> +	fi
> +
> +	cat <<EOF | sudo tee "${init_script}"
> +#!/bin/bash
> +
> +{
> +
> +	cd /root/bpf
> +	echo ${command}
> +	${command}
> +} 2>&1 | tee /root/${log_file}
> +poweroff -f
> +EOF
> +
> +	sudo chmod a+x "${init_script}"
> +	unmount_image
> +}
> +
> +create_vm_image()
> +{
> +	local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}"
> +	local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
> +
> +	rm -rf "${rootfs_img}"
> +	touch "${rootfs_img}"
> +	chattr +C "${rootfs_img}" >/dev/null 2>&1 || true
> +
> +	truncate -s 2G "${rootfs_img}"
> +	mkfs.ext4 -q "${rootfs_img}"
> +
> +	mount_image
> +	download_rootfs "$(newest_rootfs_version)" "${mount_dir}"
> +	unmount_image
> +}
> +
> +run_vm()
> +{
> +	local kernel_bzimage="$1"
> +	local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}"
> +
> +	if ! which "${QEMU_BINARY}" &> /dev/null; then
> +		cat <<EOF
> +Could not find ${QEMU_BINARY}
> +Please install qemu or set the QEMU_BINARY environment variable.
> +EOF
> +		exit 1
> +	fi
> +
> +	${QEMU_BINARY} \
> +		-nodefaults \
> +		-display none \
> +		-serial mon:stdio \
> +		-cpu kvm64 \
> +		-enable-kvm \
> +		-smp 4 \
> +		-m 2G \
> +		-drive file="${rootfs_img}",format=raw,index=1,media=disk,if=virtio,cache=none \
> +		-kernel "${kernel_bzimage}" \
> +		-append "root=/dev/vda rw console=ttyS0,115200"
> +}
> +
> +copy_logs()
> +{
> +	local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
> +	local log_file="${mount_dir}/root/$1"
> +
> +	mount_image
> +	sudo cp ${log_file} "${OUTPUT_DIR}"
> +	sudo rm -f ${log_file}
> +	unmount_image
> +}
> +
> +is_rel_path()
> +{
> +	local path="$1"
> +
> +	[[ ${path:0:1} != "/" ]]
> +}
> +
> +main()
> +{
> +	local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
> +	local kernel_checkout=$(realpath "${script_dir}"/../../../../)
> +	local log_file="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S.log")"
> +	# By default the script searches for the kernel in the checkout directory but
> +	# it also obeys environment variables O= and KBUILD_OUTPUT=
> +	local kernel_bzimage="${kernel_checkout}/${X86_BZIMAGE}"
> +	local command="${DEFAULT_COMMAND}"
> +	local kernel_recompile="yes"
> +	local update_image="no"
> +
> +	while getopts 'hkid:j:' opt; do
> +		case ${opt} in
> +		k)
> +			kernel_recompile="no"
> +			;;
> +		i)
> +			update_image="yes"
> +			;;
> +		d)
> +			OUTPUT_DIR="$OPTARG"
> +			;;
> +		j)
> +			NUM_COMPILE_JOBS="$OPTARG"
> +			;;
> +		h)
> +			usage
> +			exit 0
> +			;;
> +		\? )
> +			echo "Invalid Option: -$OPTARG"
> +			usage
> +			exit 1
> +			;;
> +      		: )
> +        		echo "Invalid Option: -$OPTARG requires an argument"
> +			usage
> +			exit 1
> +			;;
> +		esac
> +	done
> +	shift $((OPTIND -1))
> +
> +	if [[ $# -eq 0 ]]; then
> +		echo "No command specified, will run ${DEFAULT_COMMAND} in the vm"
> +	else
> +		command="$@"
> +	fi
> +
> +	local kconfig_file="${OUTPUT_DIR}/latest.config"
> +	local make_command="make -j ${NUM_COMPILE_JOBS} KCONFIG_CONFIG=${kconfig_file}"
> +
> +	# Figure out where the kernel is being built.
> +	# O takes precedence over KBUILD_OUTPUT.
> +	if [[ "${O:=""}" != "" ]]; then
> +		if is_rel_path "${O}"; then
> +			O="$(realpath "${PWD}/${O}")"
> +		fi
> +		kernel_bzimage="${O}/${X86_BZIMAGE}"
> +		make_command="${make_command} O=${O}"
> +	elif [[ "${KBUILD_OUTPUT:=""}" != "" ]]; then
> +		if is_rel_path "${KBUILD_OUTPUT}"; then
> +			KBUILD_OUTPUT="$(realpath "${PWD}/${KBUILD_OUTPUT}")"
> +		fi
> +		kernel_bzimage="${KBUILD_OUTPUT}/${X86_BZIMAGE}"
> +		make_command="${make_command} KBUILD_OUTPUT=${KBUILD_OUTPUT}"
> +	fi
> +
> +	populate_url_map
> +
> +	local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}"
> +	local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
> +
> +	echo "Output directory: ${OUTPUT_DIR}"
> +
> +	mkdir -p "${OUTPUT_DIR}"
> +	mkdir -p "${mount_dir}"
> +	curl -Lf "${KCONFIG_URL}" -o "${kconfig_file}"
> +
> +	if [[ "${kernel_recompile}" == "no" && ! -f "${kernel_bzimage}" ]]; then
> +		echo "Kernel image not found in ${kernel_bzimage}, kernel will be recompiled"
> +		kernel_recompile="yes"
> +	fi
> +
> +	if [[ "${kernel_recompile}" == "yes" ]]; then
> +		recompile_kernel "${kernel_checkout}" "${make_command}"
> +	fi
> +
> +	if [[ "${update_image}" == "no" && ! -f "${rootfs_img}" ]]; then
> +		echo "rootfs image not found in ${rootfs_img}"
> +		update_image="yes"
> +	fi
> +
> +	if [[ "${update_image}" == "yes" ]]; then
> +		create_vm_image
> +	fi
> +
> +	update_selftests "${kernel_checkout}" "${make_command}"
> +	update_init_script "${command}" "${log_file}"
> +	run_vm "${kernel_bzimage}"
> +	copy_logs "${log_file}"
> +	echo "Logs saved in ${OUTPUT_DIR}/${log_file}"
> +}
> +
> +catch()
> +{
> +	local exit_code=$1
> +
> +	unmount_image
> +	exit ${exit_code}
> +}
> +
> +trap 'catch "$?"' EXIT
> +
> +main "$@"
> -- 
> 2.30.0.280.ga3ce27912f-goog
>
Andrii Nakryiko Jan. 26, 2021, 2:09 a.m. UTC | #7
On Fri, Jan 22, 2021 at 4:44 PM KP Singh <kpsingh@kernel.org> wrote:
>
> The script runs the BPF selftests locally on the same kernel image
> as they would run post submit in the BPF continuous integration
> framework.
>
> The goal of the script is to allow contributors to run selftests locally
> in the same environment to check if their changes would end up breaking
> the BPF CI and reduce the back-and-forth between the maintainers and the
> developers.
>
> Signed-off-by: KP Singh <kpsingh@kernel.org>
> ---

This is great, thanks a lot for working on this! This is great
especially for ad-hoc contributors who don't have qemu workflow setup.
Below are some comments for the extra polish :)

1. There is this long list output at the beginning:

https://libbpf-vmtest.s3-us-west-1.amazonaws.com/x86_64/vmlinux-5.5.0.zst
https://libbpf-vmtest....

Can we omit that?

2. Then something is re-downloaded every single time:

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 77713  100 77713    0     0   509k      0 --:--:-- --:--:-- --:--:--  512k

Unless it's to check if something newer appeared in S3, would be nice
to skip that step.

3. Every single time I run the script it actually rebuilds kernel.
Somehow Linux Makefile's logic to do nothing if nothing changed in
Linux source code doesn't kick in, I wonder why? It's quite annoying
and time-consuming for frequent selftest reruns. What's weird is that
individual .o's are not re-built, but kernel is still re-linked and
BTF is re-generated, which is the slow part :(

4. Selftests are re-built from scratch every single time, even if
nothing changed. Again, strange because they won't do it normally. And
given there is a fixed re-usable .bpf_selftests "cache directory", we
should be able to set everything up so that no extra compilation is
performed, no?

5. Before VM is started there is:


#!/bin/bash

{

        cd /root/bpf
        echo ./test_progs
        ./test_progs
} 2>&1 | tee /root/bpf_selftests.2021-01-25_17-56-11.log
poweroff -f


Which is probably useful in rare cases for debugging purposes, but is
just distracting in common case. Would it be able to have verbose flag
for your script that would omit output like this by default?

6. Was too lazy to check, but once VM boots and before specified
command is run, there is a bunch of verbose script echoing:

+ for path in /etc/rcS.d/S*

If that's part of libbpf CI's image, let's fix it there. If not, let's
fix it in your script?

7. Is it just me, or when ./test_progs is run inside VM, it's output
is somehow heavily buffered and delayed? I get no output for a while,
and then a whole bunch of lines with already passed tests.  Curious if
anyone else noticed that as well. When I run the same image locally
and manually (not through your script), I don't have this issue.

8. I noticed that even if the command succeeds (e.g., ./test_progs in
my case), the script exits with non-zero error code (32 in my case).
That's suboptimal, because you can't use that script to detect test
failures.

But again, it's the polish feedback, great work!

>  tools/testing/selftests/bpf/run_in_vm.sh | 353 +++++++++++++++++++++++
>  1 file changed, 353 insertions(+)
>  create mode 100755 tools/testing/selftests/bpf/run_in_vm.sh
>
> diff --git a/tools/testing/selftests/bpf/run_in_vm.sh b/tools/testing/selftests/bpf/run_in_vm.sh
> new file mode 100755
> index 000000000000..09bb9705acb3
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/run_in_vm.sh
> @@ -0,0 +1,353 @@
> +#!/bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +
> +set -u
> +set -e
> +
> +QEMU_BINARY="${QEMU_BINARY:="qemu-system-x86_64"}"
> +X86_BZIMAGE="arch/x86/boot/bzImage"

Might be worth it to mention that this only works with x86_64 (due to
image restrictions at least, right?).

> +DEFAULT_COMMAND="./test_progs"
> +MOUNT_DIR="mnt"
> +ROOTFS_IMAGE="root.img"
> +OUTPUT_DIR="$HOME/.bpf_selftests"
> +KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/latest.config"
> +INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX"
> +NUM_COMPILE_JOBS="$(nproc)"
> +
> +usage()
> +{
> +       cat <<EOF
> +Usage: $0 [-k] [-i] [-d <output_dir>] -- [<command>]
> +
> +<command> is the command you would normally run when you are in
> +tools/testing/selftests/bpf. e.g:
> +
> +       $0 -- ./test_progs -t test_lsm
> +
> +If no command is specified, "${DEFAULT_COMMAND}" will be run by
> +default.
> +
> +If you build your kernel using KBUILD_OUTPUT= or O= options, these
> +can be passed as environment variables to the script:
> +
> +  O=<path_relative_to_cwd> $0 -- ./test_progs -t test_lsm

"relative_to_cwd" is a bit misleading, it could be an absolute path as
well, I presume. So I'd just say "O=<kernel_build_path>" or something
along those lines.

> +
> +or
> +
> +  KBUILD_OUTPUT=<path_relative_to_cwd> $0 -- ./test_progs -t test_lsm
> +

[...]
Jiri Olsa Jan. 26, 2021, 7:51 p.m. UTC | #8
On Mon, Jan 25, 2021 at 06:09:48PM -0800, Andrii Nakryiko wrote:

SNIP

> 
> 7. Is it just me, or when ./test_progs is run inside VM, it's output
> is somehow heavily buffered and delayed? I get no output for a while,
> and then a whole bunch of lines with already passed tests.  Curious if
> anyone else noticed that as well. When I run the same image locally
> and manually (not through your script), I don't have this issue.

I see the same thing, big delay on the test_prog start and then
chunk updates of its output

jirka

> 
> 8. I noticed that even if the command succeeds (e.g., ./test_progs in
> my case), the script exits with non-zero error code (32 in my case).
> That's suboptimal, because you can't use that script to detect test
> failures.
> 
> But again, it's the polish feedback, great work!
> 
> >  tools/testing/selftests/bpf/run_in_vm.sh | 353 +++++++++++++++++++++++
> >  1 file changed, 353 insertions(+)
> >  create mode 100755 tools/testing/selftests/bpf/run_in_vm.sh
> >
> > diff --git a/tools/testing/selftests/bpf/run_in_vm.sh b/tools/testing/selftests/bpf/run_in_vm.sh
> > new file mode 100755
> > index 000000000000..09bb9705acb3
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/run_in_vm.sh
> > @@ -0,0 +1,353 @@
> > +#!/bin/bash
> > +# SPDX-License-Identifier: GPL-2.0
> > +
> > +set -u
> > +set -e
> > +
> > +QEMU_BINARY="${QEMU_BINARY:="qemu-system-x86_64"}"
> > +X86_BZIMAGE="arch/x86/boot/bzImage"
> 
> Might be worth it to mention that this only works with x86_64 (due to
> image restrictions at least, right?).
> 
> > +DEFAULT_COMMAND="./test_progs"
> > +MOUNT_DIR="mnt"
> > +ROOTFS_IMAGE="root.img"
> > +OUTPUT_DIR="$HOME/.bpf_selftests"
> > +KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/latest.config"
> > +INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX"
> > +NUM_COMPILE_JOBS="$(nproc)"
> > +
> > +usage()
> > +{
> > +       cat <<EOF
> > +Usage: $0 [-k] [-i] [-d <output_dir>] -- [<command>]
> > +
> > +<command> is the command you would normally run when you are in
> > +tools/testing/selftests/bpf. e.g:
> > +
> > +       $0 -- ./test_progs -t test_lsm
> > +
> > +If no command is specified, "${DEFAULT_COMMAND}" will be run by
> > +default.
> > +
> > +If you build your kernel using KBUILD_OUTPUT= or O= options, these
> > +can be passed as environment variables to the script:
> > +
> > +  O=<path_relative_to_cwd> $0 -- ./test_progs -t test_lsm
> 
> "relative_to_cwd" is a bit misleading, it could be an absolute path as
> well, I presume. So I'd just say "O=<kernel_build_path>" or something
> along those lines.
> 
> > +
> > +or
> > +
> > +  KBUILD_OUTPUT=<path_relative_to_cwd> $0 -- ./test_progs -t test_lsm
> > +
> 
> [...]
>
KP Singh Feb. 1, 2021, 12:22 a.m. UTC | #9
On Tue, Jan 26, 2021 at 3:10 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Fri, Jan 22, 2021 at 4:44 PM KP Singh <kpsingh@kernel.org> wrote:
> >
> > The script runs the BPF selftests locally on the same kernel image
> > as they would run post submit in the BPF continuous integration
> > framework.
> >
> > The goal of the script is to allow contributors to run selftests locally
> > in the same environment to check if their changes would end up breaking
> > the BPF CI and reduce the back-and-forth between the maintainers and the
> > developers.
> >
> > Signed-off-by: KP Singh <kpsingh@kernel.org>
> > ---
>
> This is great, thanks a lot for working on this! This is great
> especially for ad-hoc contributors who don't have qemu workflow setup.
> Below are some comments for the extra polish :)
>
> 1. There is this long list output at the beginning:
>
> https://libbpf-vmtest.s3-us-west-1.amazonaws.com/x86_64/vmlinux-5.5.0.zst
> https://libbpf-vmtest....
>
> Can we omit that?

Sure.

>
> 2. Then something is re-downloaded every single time:
>
>   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
>                                  Dload  Upload   Total   Spent    Left  Speed
> 100 77713  100 77713    0     0   509k      0 --:--:-- --:--:-- --:--:--  512k
>
> Unless it's to check if something newer appeared in S3, would be nice
> to skip that step.

This is the kernel config. I wonder how we could check if there is something
new without downloading it, the file is called "latest.config".

Maybe this is something we can add to the URL index as well in format similar
 to the image. But since it's just a config file I am not sure
it's worth the extra effort.

>
> 3. Every single time I run the script it actually rebuilds kernel.
> Somehow Linux Makefile's logic to do nothing if nothing changed in
> Linux source code doesn't kick in, I wonder why? It's quite annoying
> and time-consuming for frequent selftest reruns. What's weird is that
> individual .o's are not re-built, but kernel is still re-linked and
> BTF is re-generated, which is the slow part :(

I changed this from not compiling the kernel by default, to compiling it and you
can "keep your old kernel" with -k. This is because users may run the script,
not compile the kernel and run into issues with the image not being able to
mount as the kernel does not have the right config.

The -k is for people who know what they are doing :)

so you can always run

 ./bpf_presubmit.sh -k

after you have the kernel built once.

>
> 4. Selftests are re-built from scratch every single time, even if
> nothing changed. Again, strange because they won't do it normally. And
> given there is a fixed re-usable .bpf_selftests "cache directory", we
> should be able to set everything up so that no extra compilation is
> performed, no?
>
> 5. Before VM is started there is:
>
>
> #!/bin/bash
>
> {
>
>         cd /root/bpf
>         echo ./test_progs
>         ./test_progs
> } 2>&1 | tee /root/bpf_selftests.2021-01-25_17-56-11.log
> poweroff -f
>
>
> Which is probably useful in rare cases for debugging purposes, but is
> just distracting in common case. Would it be able to have verbose flag
> for your script that would omit output like this by default?

Sure. I can omit it for now and submit a subsequent patch that adds verbosity.

>
> 6. Was too lazy to check, but once VM boots and before specified
> command is run, there is a bunch of verbose script echoing:
>
> + for path in /etc/rcS.d/S*
>
> If that's part of libbpf CI's image, let's fix it there. If not, let's
> fix it in your script?

Nope, this is not from my script so probably something from one of the
CI init scripts.

>
> 7. Is it just me, or when ./test_progs is run inside VM, it's output
> is somehow heavily buffered and delayed? I get no output for a while,
> and then a whole bunch of lines with already passed tests.  Curious if
> anyone else noticed that as well. When I run the same image locally
> and manually (not through your script), I don't have this issue.

I saw this as well but sort of ignored it as it was random for me, but I did
some digging and found that this could be related to buffering within
test_progs, so I changed the buffering to per-line and now it does not
get stuck and dump its output as you and Jiri noticed.

--- a/tools/testing/selftests/bpf/run_in_vm.sh
+++ b/tools/testing/selftests/bpf/run_in_vm.sh
@@ -165,7 +165,7 @@ EOF

        cd /root/bpf
        echo ${command}
-       ${command}
+       stdbuf -oL -eL ${command}
 } 2>&1 | tee /root/${log_file}

>
> 8. I noticed that even if the command succeeds (e.g., ./test_progs in
> my case), the script exits with non-zero error code (32 in my case).
> That's suboptimal, because you can't use that script to detect test
> failures.

I found this was because if the unmount command
in the cleanup block fails
(when the directory was not mounted or already unmounted)
we would never get to the exit command.

The snippet below fixes this.

@@ -343,8 +343,10 @@ main()
 catch()
 {
        local exit_code=$1
-
-       unmount_image
+       # This is just a cleanup and the directory may
+       # have already been unmounted. So, don't let this
+       # clobber the error code we intend to return.
+       unmount_image || true
        exit ${exit_code}
 }

>
> But again, it's the polish feedback, great work!

Thanks! :)



>
> >  tools/testing/selftests/bpf/run_in_vm.sh | 353 +++++++++++++++++++++++
> >  1 file changed, 353 insertions(+)
> >  create mode 100755 tools/testing/selftests/bpf/run_in_vm.sh
> >
> > diff --git a/tools/testing/selftests/bpf/run_in_vm.sh b/tools/testing/selftests/bpf/run_in_vm.sh
> > new file mode 100755
> > index 000000000000..09bb9705acb3
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/run_in_vm.sh
> > @@ -0,0 +1,353 @@
> > +#!/bin/bash
> > +# SPDX-License-Identifier: GPL-2.0
> > +
> > +set -u
> > +set -e
> > +
> > +QEMU_BINARY="${QEMU_BINARY:="qemu-system-x86_64"}"
> > +X86_BZIMAGE="arch/x86/boot/bzImage"
>
> Might be worth it to mention that this only works with x86_64 (due to
> image restrictions at least, right?).
>
> > +DEFAULT_COMMAND="./test_progs"
> > +MOUNT_DIR="mnt"
> > +ROOTFS_IMAGE="root.img"
> > +OUTPUT_DIR="$HOME/.bpf_selftests"
> > +KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/latest.config"
> > +INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX"
> > +NUM_COMPILE_JOBS="$(nproc)"
> > +
> > +usage()
> > +{
> > +       cat <<EOF
> > +Usage: $0 [-k] [-i] [-d <output_dir>] -- [<command>]
> > +
> > +<command> is the command you would normally run when you are in
> > +tools/testing/selftests/bpf. e.g:
> > +
> > +       $0 -- ./test_progs -t test_lsm
> > +
> > +If no command is specified, "${DEFAULT_COMMAND}" will be run by
> > +default.
> > +
> > +If you build your kernel using KBUILD_OUTPUT= or O= options, these
> > +can be passed as environment variables to the script:
> > +
> > +  O=<path_relative_to_cwd> $0 -- ./test_progs -t test_lsm
>
> "relative_to_cwd" is a bit misleading, it could be an absolute path as
> well, I presume. So I'd just say "O=<kernel_build_path>" or something
> along those lines.
>
> > +
> > +or
> > +
> > +  KBUILD_OUTPUT=<path_relative_to_cwd> $0 -- ./test_progs -t test_lsm
> > +
>
> [...]
KP Singh Feb. 1, 2021, 12:40 a.m. UTC | #10
On Tue, Jan 26, 2021 at 3:10 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Fri, Jan 22, 2021 at 4:44 PM KP Singh <kpsingh@kernel.org> wrote:
> >
> > The script runs the BPF selftests locally on the same kernel image
> > as they would run post submit in the BPF continuous integration
> > framework.
> >
> > The goal of the script is to allow contributors to run selftests locally
> > in the same environment to check if their changes would end up breaking
> > the BPF CI and reduce the back-and-forth between the maintainers and the
> > developers.
> >
> > Signed-off-by: KP Singh <kpsingh@kernel.org>
> > ---
>
> This is great, thanks a lot for working on this! This is great
> especially for ad-hoc contributors who don't have qemu workflow setup.
> Below are some comments for the extra polish :)
>
> 1. There is this long list output at the beginning:
>
> https://libbpf-vmtest.s3-us-west-1.amazonaws.com/x86_64/vmlinux-5.5.0.zst
> https://libbpf-vmtest....
>
> Can we omit that?
>
> 2. Then something is re-downloaded every single time:
>
>   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
>                                  Dload  Upload   Total   Spent    Left  Speed
> 100 77713  100 77713    0     0   509k      0 --:--:-- --:--:-- --:--:--  512k
>
> Unless it's to check if something newer appeared in S3, would be nice
> to skip that step.
>
> 3. Every single time I run the script it actually rebuilds kernel.
> Somehow Linux Makefile's logic to do nothing if nothing changed in
> Linux source code doesn't kick in, I wonder why? It's quite annoying
> and time-consuming for frequent selftest reruns. What's weird is that
> individual .o's are not re-built, but kernel is still re-linked and
> BTF is re-generated, which is the slow part :(
>
> 4. Selftests are re-built from scratch every single time, even if
> nothing changed. Again, strange because they won't do it normally. And
> given there is a fixed re-usable .bpf_selftests "cache directory", we
> should be able to set everything up so that no extra compilation is
> performed, no?
>
> 5. Before VM is started there is:
>
>
> #!/bin/bash
>
> {
>
>         cd /root/bpf
>         echo ./test_progs
>         ./test_progs
> } 2>&1 | tee /root/bpf_selftests.2021-01-25_17-56-11.log
> poweroff -f
>
>
> Which is probably useful in rare cases for debugging purposes, but is
> just distracting in common case. Would it be able to have verbose flag
> for your script that would omit output like this by default?
>
> 6. Was too lazy to check, but once VM boots and before specified
> command is run, there is a bunch of verbose script echoing:
>
> + for path in /etc/rcS.d/S*
>
> If that's part of libbpf CI's image, let's fix it there. If not, let's
> fix it in your script?
>
> 7. Is it just me, or when ./test_progs is run inside VM, it's output
> is somehow heavily buffered and delayed? I get no output for a while,
> and then a whole bunch of lines with already passed tests.  Curious if
> anyone else noticed that as well. When I run the same image locally
> and manually (not through your script), I don't have this issue.
>
> 8. I noticed that even if the command succeeds (e.g., ./test_progs in
> my case), the script exits with non-zero error code (32 in my case).
> That's suboptimal, because you can't use that script to detect test
> failures.
>
> But again, it's the polish feedback, great work!
>
> >  tools/testing/selftests/bpf/run_in_vm.sh | 353 +++++++++++++++++++++++
> >  1 file changed, 353 insertions(+)
> >  create mode 100755 tools/testing/selftests/bpf/run_in_vm.sh
> >
> > diff --git a/tools/testing/selftests/bpf/run_in_vm.sh b/tools/testing/selftests/bpf/run_in_vm.sh
> > new file mode 100755
> > index 000000000000..09bb9705acb3
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/run_in_vm.sh
> > @@ -0,0 +1,353 @@
> > +#!/bin/bash
> > +# SPDX-License-Identifier: GPL-2.0
> > +
> > +set -u
> > +set -e
> > +
> > +QEMU_BINARY="${QEMU_BINARY:="qemu-system-x86_64"}"
> > +X86_BZIMAGE="arch/x86/boot/bzImage"
>
> Might be worth it to mention that this only works with x86_64 (due to
> image restrictions at least, right?).

Thanks, I added a comment here. I really hope someone can contribute
support for other architectures too :)

>
> > +DEFAULT_COMMAND="./test_progs"
> > +MOUNT_DIR="mnt"
> > +ROOTFS_IMAGE="root.img"
> > +OUTPUT_DIR="$HOME/.bpf_selftests"
> > +KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/latest.config"
> > +INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX"
> > +NUM_COMPILE_JOBS="$(nproc)"
> > +
> > +usage()
> > +{
> > +       cat <<EOF
> > +Usage: $0 [-k] [-i] [-d <output_dir>] -- [<command>]
> > +
> > +<command> is the command you would normally run when you are in
> > +tools/testing/selftests/bpf. e.g:
> > +
> > +       $0 -- ./test_progs -t test_lsm
> > +
> > +If no command is specified, "${DEFAULT_COMMAND}" will be run by
> > +default.
> > +
> > +If you build your kernel using KBUILD_OUTPUT= or O= options, these
> > +can be passed as environment variables to the script:
> > +
> > +  O=<path_relative_to_cwd> $0 -- ./test_progs -t test_lsm
>
> "relative_to_cwd" is a bit misleading, it could be an absolute path as
> well, I presume. So I'd just say "O=<kernel_build_path>" or something
> along those lines.

Sorry, I missed this in my other reply, updated.

>
> > +
> > +or
> > +
> > +  KBUILD_OUTPUT=<path_relative_to_cwd> $0 -- ./test_progs -t test_lsm
> > +
>
> [...]
Andrii Nakryiko Feb. 2, 2021, 6:25 a.m. UTC | #11
On Sun, Jan 31, 2021 at 4:22 PM KP Singh <kpsingh@kernel.org> wrote:
>
> On Tue, Jan 26, 2021 at 3:10 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Fri, Jan 22, 2021 at 4:44 PM KP Singh <kpsingh@kernel.org> wrote:
> > >
> > > The script runs the BPF selftests locally on the same kernel image
> > > as they would run post submit in the BPF continuous integration
> > > framework.
> > >
> > > The goal of the script is to allow contributors to run selftests locally
> > > in the same environment to check if their changes would end up breaking
> > > the BPF CI and reduce the back-and-forth between the maintainers and the
> > > developers.
> > >
> > > Signed-off-by: KP Singh <kpsingh@kernel.org>
> > > ---
> >
> > This is great, thanks a lot for working on this! This is great
> > especially for ad-hoc contributors who don't have qemu workflow setup.
> > Below are some comments for the extra polish :)
> >
> > 1. There is this long list output at the beginning:
> >
> > https://libbpf-vmtest.s3-us-west-1.amazonaws.com/x86_64/vmlinux-5.5.0.zst
> > https://libbpf-vmtest....
> >
> > Can we omit that?
>
> Sure.
>
> >
> > 2. Then something is re-downloaded every single time:
> >
> >   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
> >                                  Dload  Upload   Total   Spent    Left  Speed
> > 100 77713  100 77713    0     0   509k      0 --:--:-- --:--:-- --:--:--  512k
> >
> > Unless it's to check if something newer appeared in S3, would be nice
> > to skip that step.
>
> This is the kernel config. I wonder how we could check if there is something
> new without downloading it, the file is called "latest.config".
>
> Maybe this is something we can add to the URL index as well in format similar
>  to the image. But since it's just a config file I am not sure
> it's worth the extra effort.

Curl supports the following option. Given we have a local cache in
.bpf_selftests, check if it already has .config and pass it as -z
'.bpf_selftests/.config'? Would be nice, if it works out. If not, I
agree, config is small enough to not go to great lengths to avoid
downloading it.

-z/--time-cond <date expression>

(HTTP/FTP) Request a file that has been modified later than the given
time and date, or one that has been modified before that time. The
date expression can be all sorts of date strings or if it doesn't
match any internal ones, it tries to get the time from a given file
name instead.


>
> >
> > 3. Every single time I run the script it actually rebuilds kernel.
> > Somehow Linux Makefile's logic to do nothing if nothing changed in
> > Linux source code doesn't kick in, I wonder why? It's quite annoying
> > and time-consuming for frequent selftest reruns. What's weird is that
> > individual .o's are not re-built, but kernel is still re-linked and
> > BTF is re-generated, which is the slow part :(
>
> I changed this from not compiling the kernel by default, to compiling it and you
> can "keep your old kernel" with -k. This is because users may run the script,
> not compile the kernel and run into issues with the image not being able to
> mount as the kernel does not have the right config.
>
> The -k is for people who know what they are doing :)
>
> so you can always run
>
>  ./bpf_presubmit.sh -k
>
> after you have the kernel built once.

That's not what I'm saying. When running `make` to build Linux, if
won't do much at all if nothing changed. That's a good property that
saves tons of time. I'm saying your script somehow precludes that
behavior and make does tons of unnecessary work. It might be because
of always re-downloaded config, which might make the above (not
redownloading it if it didn't change) more important.

Sure -k might be used this way, but it's expected to happen
automatically. I'm just pointing out that something is not wired
optimally to allow make do its job properly.

>
> >
> > 4. Selftests are re-built from scratch every single time, even if
> > nothing changed. Again, strange because they won't do it normally. And
> > given there is a fixed re-usable .bpf_selftests "cache directory", we
> > should be able to set everything up so that no extra compilation is
> > performed, no?
> >

And this won't be solved with '-k' alone, probably?

> > 5. Before VM is started there is:
> >
> >
> > #!/bin/bash
> >
> > {
> >
> >         cd /root/bpf
> >         echo ./test_progs
> >         ./test_progs
> > } 2>&1 | tee /root/bpf_selftests.2021-01-25_17-56-11.log
> > poweroff -f
> >
> >
> > Which is probably useful in rare cases for debugging purposes, but is
> > just distracting in common case. Would it be able to have verbose flag
> > for your script that would omit output like this by default?
>
> Sure. I can omit it for now and submit a subsequent patch that adds verbosity.

Great.

>
> >
> > 6. Was too lazy to check, but once VM boots and before specified
> > command is run, there is a bunch of verbose script echoing:
> >
> > + for path in /etc/rcS.d/S*
> >
> > If that's part of libbpf CI's image, let's fix it there. If not, let's
> > fix it in your script?
>
> Nope, this is not from my script so probably something from one of the
> CI init scripts.
>

Ok, I'll take a look and see if we can remove this noise.

> >
> > 7. Is it just me, or when ./test_progs is run inside VM, it's output
> > is somehow heavily buffered and delayed? I get no output for a while,
> > and then a whole bunch of lines with already passed tests.  Curious if
> > anyone else noticed that as well. When I run the same image locally
> > and manually (not through your script), I don't have this issue.
>
> I saw this as well but sort of ignored it as it was random for me, but I did
> some digging and found that this could be related to buffering within
> test_progs, so I changed the buffering to per-line and now it does not
> get stuck and dump its output as you and Jiri noticed.
>
> --- a/tools/testing/selftests/bpf/run_in_vm.sh
> +++ b/tools/testing/selftests/bpf/run_in_vm.sh
> @@ -165,7 +165,7 @@ EOF
>
>         cd /root/bpf
>         echo ${command}
> -       ${command}
> +       stdbuf -oL -eL ${command}

makes sense, thanks!

>  } 2>&1 | tee /root/${log_file}
>
> >
> > 8. I noticed that even if the command succeeds (e.g., ./test_progs in
> > my case), the script exits with non-zero error code (32 in my case).
> > That's suboptimal, because you can't use that script to detect test
> > failures.
>
> I found this was because if the unmount command
> in the cleanup block fails
> (when the directory was not mounted or already unmounted)
> we would never get to the exit command.
>
> The snippet below fixes this.
>

awesome, makes this script useful for scripting :)

> @@ -343,8 +343,10 @@ main()
>  catch()
>  {
>         local exit_code=$1
> -
> -       unmount_image
> +       # This is just a cleanup and the directory may
> +       # have already been unmounted. So, don't let this
> +       # clobber the error code we intend to return.
> +       unmount_image || true
>         exit ${exit_code}
>  }
>
> >
> > But again, it's the polish feedback, great work!
>
> Thanks! :)
>
>
>
> >

[...]
KP Singh Feb. 2, 2021, 9:13 p.m. UTC | #12
[...]

> >
> > >
> > > 2. Then something is re-downloaded every single time:
> > >
> > >   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
> > >                                  Dload  Upload   Total   Spent    Left  Speed
> > > 100 77713  100 77713    0     0   509k      0 --:--:-- --:--:-- --:--:--  512k
> > >
> > > Unless it's to check if something newer appeared in S3, would be nice
> > > to skip that step.
> >
> > This is the kernel config. I wonder how we could check if there is something
> > new without downloading it, the file is called "latest.config".
> >
> > Maybe this is something we can add to the URL index as well in format similar
> >  to the image. But since it's just a config file I am not sure
> > it's worth the extra effort.
>
> Curl supports the following option. Given we have a local cache in
> .bpf_selftests, check if it already has .config and pass it as -z
> '.bpf_selftests/.config'? Would be nice, if it works out. If not, I
> agree, config is small enough to not go to great lengths to avoid
> downloading it.
>
> -z/--time-cond <date expression>
>
> (HTTP/FTP) Request a file that has been modified later than the given
> time and date, or one that has been modified before that time. The
> date expression can be all sorts of date strings or if it doesn't
> match any internal ones, it tries to get the time from a given file
> name instead.

This does not work with the github github raw URL so I had to do something like:

diff --git a/tools/testing/selftests/bpf/run_in_vm.sh
b/tools/testing/selftests/bpf/run_in_vm.sh
index 46fbb0422e9e..132017981776 100755
--- a/tools/testing/selftests/bpf/run_in_vm.sh
+++ b/tools/testing/selftests/bpf/run_in_vm.sh
@@ -14,6 +14,7 @@ MOUNT_DIR="mnt"
 ROOTFS_IMAGE="root.img"
 OUTPUT_DIR="$HOME/.bpf_selftests"
 KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/latest.config"
+KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/latest.config"
 INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX"
 NUM_COMPILE_JOBS="$(nproc)"

@@ -236,6 +237,27 @@ is_rel_path()
        [[ ${path:0:1} != "/" ]]
 }

+update_kconfig()
+{
+       local kconfig_file="$1"
+       local update_command="curl -sLf ${KCONFIG_URL} -o ${kconfig_file}"
+       # Github does not return the "last-modified" header when
retrieving the raw contents of the file.
+       # Use the API call to get the last-modified time of the kernel
config and only update the config if
+       # it has been updated after the previously cached config was
created. This avoids unnecessarily
+       # compiling the kernel and selftests.
+       if [[ -f "${kconfig_file}" ]]; then
+               local last_modified_date="$(curl -sL -D -
"${KCONFIG_API_URL}" -o /dev/null | grep "last-modified" | awk -F ': '
'{print $2}')"
+               local remote_modified_timestamp="$(date -d
"${last_modified_date}" +"%s")"
+               local local_creation_timestamp="$(stat -c %W "${kconfig_file}")"
+
+               if [[ "${remote_modified_timestamp}" -gt
"${local_creation_timestamp}" ]]; then
+                       ${update_command}
+               fi
+       else
+               ${update_command}
+       fi
+}
+
 main()
 {
        local script_dir="$(cd -P -- "$(dirname --
"${BASH_SOURCE[0]}")" && pwd -P)"
@@ -314,7 +336,7 @@ main()

        mkdir -p "${OUTPUT_DIR}"
        mkdir -p "${mount_dir}"
-       curl -Lsf "${KCONFIG_URL}" -o "${kconfig_file}"
+       update_kconfig "${kconfig_file}"

        if [[ "${kernel_recompile}" == "no" && ! -f "${kernel_bzimage}" ]]; then
                echo "Kernel image not found in ${kernel_bzimage},
kernel will be recompiled"
>
>
> >
> > >
> > > 3. Every single time I run the script it actually rebuilds kernel.
> > > Somehow Linux Makefile's logic to do nothing if nothing changed in
> > > Linux source code doesn't kick in, I wonder why? It's quite annoying
> > > and time-consuming for frequent selftest reruns. What's weird is that
> > > individual .o's are not re-built, but kernel is still re-linked and
> > > BTF is re-generated, which is the slow part :(
> >
> > I changed this from not compiling the kernel by default, to compiling it and you
> > can "keep your old kernel" with -k. This is because users may run the script,
> > not compile the kernel and run into issues with the image not being able to
> > mount as the kernel does not have the right config.
> >
> > The -k is for people who know what they are doing :)
> >
> > so you can always run
> >
> >  ./bpf_presubmit.sh -k
> >
> > after you have the kernel built once.
>
> That's not what I'm saying. When running `make` to build Linux, if
> won't do much at all if nothing changed. That's a good property that
> saves tons of time. I'm saying your script somehow precludes that
> behavior and make does tons of unnecessary work. It might be because
> of always re-downloaded config, which might make the above (not
> redownloading it if it didn't change) more important.
>
> Sure -k might be used this way, but it's expected to happen
> automatically. I'm just pointing out that something is not wired
> optimally to allow make do its job properly.

Ah, now I see what you are saying and yeah, it was indeed the downloading
of the config every time that was causing the kernel and selftest to be
recompiled.

With the change I posted above this does not happen anymore. I guess, with
this we can simply remove the -k option?

>
> >
> > >
> > > 4. Selftests are re-built from scratch every single time, even if
> > > nothing changed. Again, strange because they won't do it normally. And
> > > given there is a fixed re-usable .bpf_selftests "cache directory", we
> > > should be able to set everything up so that no extra compilation is
> > > performed, no?
> > >
>
> And this won't be solved with '-k' alone, probably?

Yeah..

>
> > > 5. Before VM is started there is:
> > >
> > >
> > > #!/bin/bash

[...]
Andrii Nakryiko Feb. 2, 2021, 9:37 p.m. UTC | #13
On Tue, Feb 2, 2021 at 1:13 PM KP Singh <kpsingh@kernel.org> wrote:
>
> [...]
>
> > >
> > > >
> > > > 2. Then something is re-downloaded every single time:
> > > >
> > > >   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
> > > >                                  Dload  Upload   Total   Spent    Left  Speed
> > > > 100 77713  100 77713    0     0   509k      0 --:--:-- --:--:-- --:--:--  512k
> > > >
> > > > Unless it's to check if something newer appeared in S3, would be nice
> > > > to skip that step.
> > >
> > > This is the kernel config. I wonder how we could check if there is something
> > > new without downloading it, the file is called "latest.config".
> > >
> > > Maybe this is something we can add to the URL index as well in format similar
> > >  to the image. But since it's just a config file I am not sure
> > > it's worth the extra effort.
> >
> > Curl supports the following option. Given we have a local cache in
> > .bpf_selftests, check if it already has .config and pass it as -z
> > '.bpf_selftests/.config'? Would be nice, if it works out. If not, I
> > agree, config is small enough to not go to great lengths to avoid
> > downloading it.
> >
> > -z/--time-cond <date expression>
> >
> > (HTTP/FTP) Request a file that has been modified later than the given
> > time and date, or one that has been modified before that time. The
> > date expression can be all sorts of date strings or if it doesn't
> > match any internal ones, it tries to get the time from a given file
> > name instead.
>
> This does not work with the github github raw URL so I had to do something like:
>
> diff --git a/tools/testing/selftests/bpf/run_in_vm.sh
> b/tools/testing/selftests/bpf/run_in_vm.sh
> index 46fbb0422e9e..132017981776 100755
> --- a/tools/testing/selftests/bpf/run_in_vm.sh
> +++ b/tools/testing/selftests/bpf/run_in_vm.sh
> @@ -14,6 +14,7 @@ MOUNT_DIR="mnt"
>  ROOTFS_IMAGE="root.img"
>  OUTPUT_DIR="$HOME/.bpf_selftests"
>  KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/latest.config"
> +KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/latest.config"
>  INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX"
>  NUM_COMPILE_JOBS="$(nproc)"
>
> @@ -236,6 +237,27 @@ is_rel_path()
>         [[ ${path:0:1} != "/" ]]
>  }
>
> +update_kconfig()
> +{
> +       local kconfig_file="$1"
> +       local update_command="curl -sLf ${KCONFIG_URL} -o ${kconfig_file}"
> +       # Github does not return the "last-modified" header when
> retrieving the raw contents of the file.
> +       # Use the API call to get the last-modified time of the kernel
> config and only update the config if
> +       # it has been updated after the previously cached config was
> created. This avoids unnecessarily
> +       # compiling the kernel and selftests.
> +       if [[ -f "${kconfig_file}" ]]; then
> +               local last_modified_date="$(curl -sL -D -
> "${KCONFIG_API_URL}" -o /dev/null | grep "last-modified" | awk -F ': '
> '{print $2}')"
> +               local remote_modified_timestamp="$(date -d
> "${last_modified_date}" +"%s")"
> +               local local_creation_timestamp="$(stat -c %W "${kconfig_file}")"
> +
> +               if [[ "${remote_modified_timestamp}" -gt
> "${local_creation_timestamp}" ]]; then
> +                       ${update_command}
> +               fi
> +       else
> +               ${update_command}
> +       fi
> +}
> +
>  main()
>  {
>         local script_dir="$(cd -P -- "$(dirname --
> "${BASH_SOURCE[0]}")" && pwd -P)"
> @@ -314,7 +336,7 @@ main()
>
>         mkdir -p "${OUTPUT_DIR}"
>         mkdir -p "${mount_dir}"
> -       curl -Lsf "${KCONFIG_URL}" -o "${kconfig_file}"
> +       update_kconfig "${kconfig_file}"
>
>         if [[ "${kernel_recompile}" == "no" && ! -f "${kernel_bzimage}" ]]; then
>                 echo "Kernel image not found in ${kernel_bzimage},
> kernel will be recompiled"
> >
> >
> > >
> > > >
> > > > 3. Every single time I run the script it actually rebuilds kernel.
> > > > Somehow Linux Makefile's logic to do nothing if nothing changed in
> > > > Linux source code doesn't kick in, I wonder why? It's quite annoying
> > > > and time-consuming for frequent selftest reruns. What's weird is that
> > > > individual .o's are not re-built, but kernel is still re-linked and
> > > > BTF is re-generated, which is the slow part :(
> > >
> > > I changed this from not compiling the kernel by default, to compiling it and you
> > > can "keep your old kernel" with -k. This is because users may run the script,
> > > not compile the kernel and run into issues with the image not being able to
> > > mount as the kernel does not have the right config.
> > >
> > > The -k is for people who know what they are doing :)
> > >
> > > so you can always run
> > >
> > >  ./bpf_presubmit.sh -k
> > >
> > > after you have the kernel built once.
> >
> > That's not what I'm saying. When running `make` to build Linux, if
> > won't do much at all if nothing changed. That's a good property that
> > saves tons of time. I'm saying your script somehow precludes that
> > behavior and make does tons of unnecessary work. It might be because
> > of always re-downloaded config, which might make the above (not
> > redownloading it if it didn't change) more important.
> >
> > Sure -k might be used this way, but it's expected to happen
> > automatically. I'm just pointing out that something is not wired
> > optimally to allow make do its job properly.
>
> Ah, now I see what you are saying and yeah, it was indeed the downloading
> of the config every time that was causing the kernel and selftest to be
> recompiled.
>
> With the change I posted above this does not happen anymore. I guess, with
> this we can simply remove the -k option?
>

Yeah, I think so. Very cool, with this behavior this script will
probably become a go-to script for everyone doing even occasional BPF
development. Thanks!

> >
> > >
> > > >
> > > > 4. Selftests are re-built from scratch every single time, even if
> > > > nothing changed. Again, strange because they won't do it normally. And
> > > > given there is a fixed re-usable .bpf_selftests "cache directory", we
> > > > should be able to set everything up so that no extra compilation is
> > > > performed, no?
> > > >
> >
> > And this won't be solved with '-k' alone, probably?
>
> Yeah..
>
> >
> > > > 5. Before VM is started there is:
> > > >
> > > >
> > > > #!/bin/bash
>
> [...]
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/run_in_vm.sh b/tools/testing/selftests/bpf/run_in_vm.sh
new file mode 100755
index 000000000000..09bb9705acb3
--- /dev/null
+++ b/tools/testing/selftests/bpf/run_in_vm.sh
@@ -0,0 +1,353 @@ 
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -u
+set -e
+
+QEMU_BINARY="${QEMU_BINARY:="qemu-system-x86_64"}"
+X86_BZIMAGE="arch/x86/boot/bzImage"
+DEFAULT_COMMAND="./test_progs"
+MOUNT_DIR="mnt"
+ROOTFS_IMAGE="root.img"
+OUTPUT_DIR="$HOME/.bpf_selftests"
+KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/latest.config"
+INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX"
+NUM_COMPILE_JOBS="$(nproc)"
+
+usage()
+{
+	cat <<EOF
+Usage: $0 [-k] [-i] [-d <output_dir>] -- [<command>]
+
+<command> is the command you would normally run when you are in
+tools/testing/selftests/bpf. e.g:
+
+	$0 -- ./test_progs -t test_lsm
+
+If no command is specified, "${DEFAULT_COMMAND}" will be run by
+default.
+
+If you build your kernel using KBUILD_OUTPUT= or O= options, these
+can be passed as environment variables to the script:
+
+  O=<path_relative_to_cwd> $0 -- ./test_progs -t test_lsm
+
+or
+
+  KBUILD_OUTPUT=<path_relative_to_cwd> $0 -- ./test_progs -t test_lsm
+
+Options:
+
+	-k)		"Keep the kernel", i.e. don't recompile the kernel if it exists.
+	-i)		Update the rootfs image with a newer version.
+	-d)		Update the output directory (default: ${OUTPUT_DIR})
+	-j)		Number of jobs for compilation, similar to -j in make
+			(default: ${NUM_COMPILE_JOBS})
+EOF
+}
+
+unset URLS
+populate_url_map()
+{
+	if ! declare -p URLS &> /dev/null; then
+		# URLS contain the mapping from file names to URLs where
+		# those files can be downloaded from.
+		declare -gA URLS
+		while IFS=$'\t' read -r name url; do
+			URLS["$name"]="$url"
+		done < <(curl -Lsf ${INDEX_URL})
+	fi
+	echo "${URLS[*]}"
+}
+
+download()
+{
+	local file="$1"
+
+	if [[ ! -v URLS[$file] ]]; then
+		echo "$file not found" >&2
+		return 1
+	fi
+
+	echo "Downloading $file..." >&2
+	curl -Lf "${URLS[$file]}" "${@:2}"
+}
+
+newest_rootfs_version()
+{
+	{
+	for file in "${!URLS[@]}"; do
+		if [[ $file =~ ^libbpf-vmtest-rootfs-(.*)\.tar\.zst$ ]]; then
+			echo "${BASH_REMATCH[1]}"
+		fi
+	done
+	} | sort -rV | head -1
+}
+
+download_rootfs()
+{
+	local rootfsversion="$1"
+	local dir="$2"
+
+	if ! which zstd &> /dev/null; then
+		echo 'Could not find "zstd" on the system, please install zstd'
+		exit 1
+	fi
+
+	download "libbpf-vmtest-rootfs-$rootfsversion.tar.zst" |
+		zstd -d | sudo tar -C "$dir" -x
+}
+
+recompile_kernel()
+{
+	local kernel_checkout="$1"
+	local make_command="$2"
+
+	cd "${kernel_checkout}"
+
+	${make_command} olddefconfig
+	${make_command}
+}
+
+mount_image()
+{
+	local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}"
+	local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
+
+	sudo mount -o loop "${rootfs_img}" "${mount_dir}"
+}
+
+unmount_image()
+{
+	local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
+
+	sudo umount "${mount_dir}" &> /dev/null
+}
+
+update_selftests()
+{
+	local kernel_checkout="$1"
+	local selftests_dir="${kernel_checkout}/tools/testing/selftests/bpf"
+
+	cd "${selftests_dir}"
+	${make_command}
+
+	# Mount the image and copy the selftests to the image.
+	mount_image
+	sudo rm -rf "${mount_dir}/root/bpf"
+	sudo cp -r "${selftests_dir}" "${mount_dir}/root"
+	unmount_image
+}
+
+update_init_script()
+{
+	local init_script_dir="${OUTPUT_DIR}/${MOUNT_DIR}/etc/rcS.d"
+	local init_script="${init_script_dir}/S50-startup"
+	local command="$1"
+	local log_file="$2"
+
+	mount_image
+
+	if [[ ! -d "${init_script_dir}" ]]; then
+		cat <<EOF
+Could not find ${init_script_dir} in the mounted image.
+This likely indicates a bad rootfs image, Please download
+a new image by passing "-i" to the script
+EOF
+		exit 1
+
+	fi
+
+	cat <<EOF | sudo tee "${init_script}"
+#!/bin/bash
+
+{
+
+	cd /root/bpf
+	echo ${command}
+	${command}
+} 2>&1 | tee /root/${log_file}
+poweroff -f
+EOF
+
+	sudo chmod a+x "${init_script}"
+	unmount_image
+}
+
+create_vm_image()
+{
+	local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}"
+	local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
+
+	rm -rf "${rootfs_img}"
+	touch "${rootfs_img}"
+	chattr +C "${rootfs_img}" >/dev/null 2>&1 || true
+
+	truncate -s 2G "${rootfs_img}"
+	mkfs.ext4 -q "${rootfs_img}"
+
+	mount_image
+	download_rootfs "$(newest_rootfs_version)" "${mount_dir}"
+	unmount_image
+}
+
+run_vm()
+{
+	local kernel_bzimage="$1"
+	local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}"
+
+	if ! which "${QEMU_BINARY}" &> /dev/null; then
+		cat <<EOF
+Could not find ${QEMU_BINARY}
+Please install qemu or set the QEMU_BINARY environment variable.
+EOF
+		exit 1
+	fi
+
+	${QEMU_BINARY} \
+		-nodefaults \
+		-display none \
+		-serial mon:stdio \
+		-cpu kvm64 \
+		-enable-kvm \
+		-smp 4 \
+		-m 2G \
+		-drive file="${rootfs_img}",format=raw,index=1,media=disk,if=virtio,cache=none \
+		-kernel "${kernel_bzimage}" \
+		-append "root=/dev/vda rw console=ttyS0,115200"
+}
+
+copy_logs()
+{
+	local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
+	local log_file="${mount_dir}/root/$1"
+
+	mount_image
+	sudo cp ${log_file} "${OUTPUT_DIR}"
+	sudo rm -f ${log_file}
+	unmount_image
+}
+
+is_rel_path()
+{
+	local path="$1"
+
+	[[ ${path:0:1} != "/" ]]
+}
+
+main()
+{
+	local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
+	local kernel_checkout=$(realpath "${script_dir}"/../../../../)
+	local log_file="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S.log")"
+	# By default the script searches for the kernel in the checkout directory but
+	# it also obeys environment variables O= and KBUILD_OUTPUT=
+	local kernel_bzimage="${kernel_checkout}/${X86_BZIMAGE}"
+	local command="${DEFAULT_COMMAND}"
+	local kernel_recompile="yes"
+	local update_image="no"
+
+	while getopts 'hkid:j:' opt; do
+		case ${opt} in
+		k)
+			kernel_recompile="no"
+			;;
+		i)
+			update_image="yes"
+			;;
+		d)
+			OUTPUT_DIR="$OPTARG"
+			;;
+		j)
+			NUM_COMPILE_JOBS="$OPTARG"
+			;;
+		h)
+			usage
+			exit 0
+			;;
+		\? )
+			echo "Invalid Option: -$OPTARG"
+			usage
+			exit 1
+			;;
+      		: )
+        		echo "Invalid Option: -$OPTARG requires an argument"
+			usage
+			exit 1
+			;;
+		esac
+	done
+	shift $((OPTIND -1))
+
+	if [[ $# -eq 0 ]]; then
+		echo "No command specified, will run ${DEFAULT_COMMAND} in the vm"
+	else
+		command="$@"
+	fi
+
+	local kconfig_file="${OUTPUT_DIR}/latest.config"
+	local make_command="make -j ${NUM_COMPILE_JOBS} KCONFIG_CONFIG=${kconfig_file}"
+
+	# Figure out where the kernel is being built.
+	# O takes precedence over KBUILD_OUTPUT.
+	if [[ "${O:=""}" != "" ]]; then
+		if is_rel_path "${O}"; then
+			O="$(realpath "${PWD}/${O}")"
+		fi
+		kernel_bzimage="${O}/${X86_BZIMAGE}"
+		make_command="${make_command} O=${O}"
+	elif [[ "${KBUILD_OUTPUT:=""}" != "" ]]; then
+		if is_rel_path "${KBUILD_OUTPUT}"; then
+			KBUILD_OUTPUT="$(realpath "${PWD}/${KBUILD_OUTPUT}")"
+		fi
+		kernel_bzimage="${KBUILD_OUTPUT}/${X86_BZIMAGE}"
+		make_command="${make_command} KBUILD_OUTPUT=${KBUILD_OUTPUT}"
+	fi
+
+	populate_url_map
+
+	local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}"
+	local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
+
+	echo "Output directory: ${OUTPUT_DIR}"
+
+	mkdir -p "${OUTPUT_DIR}"
+	mkdir -p "${mount_dir}"
+	curl -Lf "${KCONFIG_URL}" -o "${kconfig_file}"
+
+	if [[ "${kernel_recompile}" == "no" && ! -f "${kernel_bzimage}" ]]; then
+		echo "Kernel image not found in ${kernel_bzimage}, kernel will be recompiled"
+		kernel_recompile="yes"
+	fi
+
+	if [[ "${kernel_recompile}" == "yes" ]]; then
+		recompile_kernel "${kernel_checkout}" "${make_command}"
+	fi
+
+	if [[ "${update_image}" == "no" && ! -f "${rootfs_img}" ]]; then
+		echo "rootfs image not found in ${rootfs_img}"
+		update_image="yes"
+	fi
+
+	if [[ "${update_image}" == "yes" ]]; then
+		create_vm_image
+	fi
+
+	update_selftests "${kernel_checkout}" "${make_command}"
+	update_init_script "${command}" "${log_file}"
+	run_vm "${kernel_bzimage}"
+	copy_logs "${log_file}"
+	echo "Logs saved in ${OUTPUT_DIR}/${log_file}"
+}
+
+catch()
+{
+	local exit_code=$1
+
+	unmount_image
+	exit ${exit_code}
+}
+
+trap 'catch "$?"' EXIT
+
+main "$@"