diff mbox series

[bpf,3/3] selftest/bpf: Test the read of vsyscall page under x86-64

Message ID 20240119073019.1528573-4-houtao@huaweicloud.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Fix the read of vsyscall page through bpf | expand

Checks

Context Check Description
bpf/vmtest-bpf-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf
netdev/ynl success SINGLE THREAD; Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present fail Series targets non-next tree, but doesn't contain any Fixes tags
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/cc_maintainers success CCed 0 of 0 maintainers
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 85 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc

Commit Message

Hou Tao Jan. 19, 2024, 7:30 a.m. UTC
From: Hou Tao <houtao1@huawei.com>

Using bpf_probe_read_kernel{_str}() or bpf_probe_read{_str}() to read
from vsyscall page under x86-64 will trigger oops, so add one test case
to ensure that the problem is fixed.

Beside those four bpf helpers mentioned above, testing the read of
vsyscall page by using bpf_probe_read_user{_str} and
bpf_copy_from_user{_task}() as well.

vsyscall page could be disabled by CONFIG_LEGACY_VSYSCALL_NONE or
vsyscall=none boot cmd-line, but it doesn't affect the reproduce of the
problem and the returned error codes.

Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 .../selftests/bpf/prog_tests/read_vsyscall.c  | 61 +++++++++++++++++++
 .../selftests/bpf/progs/read_vsyscall.c       | 45 ++++++++++++++
 2 files changed, 106 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
 create mode 100644 tools/testing/selftests/bpf/progs/read_vsyscall.c

Comments

Yonghong Song Jan. 22, 2024, 6:30 a.m. UTC | #1
On 1/18/24 11:30 PM, Hou Tao wrote:
> From: Hou Tao <houtao1@huawei.com>
>
> Using bpf_probe_read_kernel{_str}() or bpf_probe_read{_str}() to read
> from vsyscall page under x86-64 will trigger oops, so add one test case
> to ensure that the problem is fixed.
>
> Beside those four bpf helpers mentioned above, testing the read of
> vsyscall page by using bpf_probe_read_user{_str} and
> bpf_copy_from_user{_task}() as well.
>
> vsyscall page could be disabled by CONFIG_LEGACY_VSYSCALL_NONE or
> vsyscall=none boot cmd-line, but it doesn't affect the reproduce of the
> problem and the returned error codes.
>
> Signed-off-by: Hou Tao <houtao1@huawei.com>
> ---
>   .../selftests/bpf/prog_tests/read_vsyscall.c  | 61 +++++++++++++++++++
>   .../selftests/bpf/progs/read_vsyscall.c       | 45 ++++++++++++++
>   2 files changed, 106 insertions(+)
>   create mode 100644 tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
>   create mode 100644 tools/testing/selftests/bpf/progs/read_vsyscall.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
> new file mode 100644
> index 0000000000000..d9247cc89cf3e
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
> @@ -0,0 +1,61 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
> +#include "test_progs.h"
> +#include "read_vsyscall.skel.h"
> +
> +#if defined(__x86_64__)
> +/* For VSYSCALL_ADDR */
> +#include <asm/vsyscall.h>
> +#else
> +/* To prevent build failure on non-x86 arch */
> +#define VSYSCALL_ADDR 0UL
> +#endif
> +
> +struct read_ret_desc {
> +	const char *name;
> +	int ret;
> +} all_read[] = {
> +	{ .name = "probe_read_kernel", .ret = -ERANGE },
> +	{ .name = "probe_read_kernel_str", .ret = -ERANGE },
> +	{ .name = "probe_read", .ret = -ERANGE },
> +	{ .name = "probe_read_str", .ret = -ERANGE },
> +	/* __access_ok() will fail */
> +	{ .name = "probe_read_user", .ret = -EFAULT },
> +	/* __access_ok() will fail */
> +	{ .name = "probe_read_user_str", .ret = -EFAULT },
> +	/* access_ok() will fail */
> +	{ .name = "copy_from_user", .ret = -EFAULT },
> +	/* both vma_lookup() and expand_stack() will fail */
> +	{ .name = "copy_from_user_task", .ret = -EFAULT },

The above comments are not clear enough. For example,
'__access_ok() will fail', user will need to
check the source code where __access_ok() is and
this could be hard e.g., for probe_read_user_str().
Another example, 'both vma_lookup() and expand_stack() will fail',
where is vma_lookup()/expand_stack()? User needs to further
check to make sense.

I suggest remove the above comments and add more
detailed explanation in commit messages with callstack
indicating where the fail/error return happens.

> +};
> +
> +void test_read_vsyscall(void)
> +{
> +	struct read_vsyscall *skel;
> +	unsigned int i;
> +	int err;
> +
> +#if !defined(__x86_64__)
> +	test__skip();
> +	return;
> +#endif
> +	skel = read_vsyscall__open_and_load();
> +	if (!ASSERT_OK_PTR(skel, "read_vsyscall open_load"))
> +		return;
> +
> +	skel->bss->target_pid = getpid();
> +	err = read_vsyscall__attach(skel);
> +	if (!ASSERT_EQ(err, 0, "read_vsyscall attach"))
> +		goto out;
> +
> +	/* userspace may don't have vsyscall page due to LEGACY_VSYSCALL_NONE,
> +	 * but it doesn't affect the returned error codes.
> +	 */
> +	skel->bss->user_ptr = (void *)VSYSCALL_ADDR;
> +	usleep(1);
> +
> +	for (i = 0; i < ARRAY_SIZE(all_read); i++)
> +		ASSERT_EQ(skel->bss->read_ret[i], all_read[i].ret, all_read[i].name);
> +out:
> +	read_vsyscall__destroy(skel);
> +}
[...]
Sohil Mehta Jan. 23, 2024, 12:25 a.m. UTC | #2
On 1/18/2024 11:30 PM, Hou Tao wrote:

> vsyscall page could be disabled by CONFIG_LEGACY_VSYSCALL_NONE or
> vsyscall=none boot cmd-line, but it doesn't affect the reproduce of the
> problem and the returned error codes.
> 

With vsyscall=emulate a direct read of the vsyscall address from
userspace is expected to go through. This is mode deprecated so maybe it
wouldn't matter much. Without the fix in patch 2/3, do you see the same
behavior with vsyscall=emulate set in the cmdline?

Sohil
Hou Tao Jan. 25, 2024, 6:37 a.m. UTC | #3
Hi,

On 1/22/2024 2:30 PM, Yonghong Song wrote:
>
> On 1/18/24 11:30 PM, Hou Tao wrote:
>> From: Hou Tao <houtao1@huawei.com>
>>
>> Using bpf_probe_read_kernel{_str}() or bpf_probe_read{_str}() to read
>> from vsyscall page under x86-64 will trigger oops, so add one test case
>> to ensure that the problem is fixed.
>>
>> Beside those four bpf helpers mentioned above, testing the read of
>> vsyscall page by using bpf_probe_read_user{_str} and
>> bpf_copy_from_user{_task}() as well.
>>
>> vsyscall page could be disabled by CONFIG_LEGACY_VSYSCALL_NONE or
>> vsyscall=none boot cmd-line, but it doesn't affect the reproduce of the
>> problem and the returned error codes.
>>
>> Signed-off-by: Hou Tao <houtao1@huawei.com>
>> ---
>>   .../selftests/bpf/prog_tests/read_vsyscall.c  | 61 +++++++++++++++++++
>>   .../selftests/bpf/progs/read_vsyscall.c       | 45 ++++++++++++++
>>   2 files changed, 106 insertions(+)
>>   create mode 100644
>> tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
>>   create mode 100644 tools/testing/selftests/bpf/progs/read_vsyscall.c
>>
>> diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
>> b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
>> new file mode 100644
>> index 0000000000000..d9247cc89cf3e
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
>> @@ -0,0 +1,61 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
>> +#include "test_progs.h"
>> +#include "read_vsyscall.skel.h"
>> +
>> +#if defined(__x86_64__)
>> +/* For VSYSCALL_ADDR */
>> +#include <asm/vsyscall.h>
>> +#else
>> +/* To prevent build failure on non-x86 arch */
>> +#define VSYSCALL_ADDR 0UL
>> +#endif
>> +
>> +struct read_ret_desc {
>> +    const char *name;
>> +    int ret;
>> +} all_read[] = {
>> +    { .name = "probe_read_kernel", .ret = -ERANGE },
>> +    { .name = "probe_read_kernel_str", .ret = -ERANGE },
>> +    { .name = "probe_read", .ret = -ERANGE },
>> +    { .name = "probe_read_str", .ret = -ERANGE },
>> +    /* __access_ok() will fail */
>> +    { .name = "probe_read_user", .ret = -EFAULT },
>> +    /* __access_ok() will fail */
>> +    { .name = "probe_read_user_str", .ret = -EFAULT },
>> +    /* access_ok() will fail */
>> +    { .name = "copy_from_user", .ret = -EFAULT },
>> +    /* both vma_lookup() and expand_stack() will fail */
>> +    { .name = "copy_from_user_task", .ret = -EFAULT },
>
> The above comments are not clear enough. For example,
> '__access_ok() will fail', user will need to
> check the source code where __access_ok() is and
> this could be hard e.g., for probe_read_user_str().
> Another example, 'both vma_lookup() and expand_stack() will fail',
> where is vma_lookup()/expand_stack()? User needs to further
> check to make sense.

Yes. These comment are highly coupled with the implementation.
>
> I suggest remove the above comments and add more
> detailed explanation in commit messages with callstack
> indicating where the fail/error return happens.

Will do in v2. Thanks for the suggestions.
>
>> +};
>> +
>> +void test_read_vsyscall(void)
>> +{
>> +    struct read_vsyscall *skel;
>> +    unsigned int i;
>> +    int err;
>> +
>> +#if !defined(__x86_64__)
>> +    test__skip();
>> +    return;
>> +#endif
>> +    skel = read_vsyscall__open_and_load();
>> +    if (!ASSERT_OK_PTR(skel, "read_vsyscall open_load"))
>> +        return;
>> +
>> +    skel->bss->target_pid = getpid();
>> +    err = read_vsyscall__attach(skel);
>> +    if (!ASSERT_EQ(err, 0, "read_vsyscall attach"))
>> +        goto out;
>> +
>> +    /* userspace may don't have vsyscall page due to
>> LEGACY_VSYSCALL_NONE,
>> +     * but it doesn't affect the returned error codes.
>> +     */
>> +    skel->bss->user_ptr = (void *)VSYSCALL_ADDR;
>> +    usleep(1);
>> +
>> +    for (i = 0; i < ARRAY_SIZE(all_read); i++)
>> +        ASSERT_EQ(skel->bss->read_ret[i], all_read[i].ret,
>> all_read[i].name);
>> +out:
>> +    read_vsyscall__destroy(skel);
>> +}
> [...]
Hou Tao Jan. 25, 2024, 7:54 a.m. UTC | #4
On 1/23/2024 8:25 AM, Sohil Mehta wrote:
> On 1/18/2024 11:30 PM, Hou Tao wrote:
>
>> vsyscall page could be disabled by CONFIG_LEGACY_VSYSCALL_NONE or
>> vsyscall=none boot cmd-line, but it doesn't affect the reproduce of the
>> problem and the returned error codes.
>>
> With vsyscall=emulate a direct read of the vsyscall address from
> userspace is expected to go through. This is mode deprecated so maybe it
> wouldn't matter much. Without the fix in patch 2/3, do you see the same
> behavior with vsyscall=emulate set in the cmdline?

Er, I think it depends on whether or not SMAP [1] feature is available.
When SMAP feature is enabled, even the vsyscall page is populated,
reading the vsyscall page through bpf_read_kernel() will trigger a page
fault and then oops. But when there is not SMAP, bpf_read_kernel() will
succeed. So I think the test may need to be skipped if vsyscall_mode is
emulate.

[1]: https://en.wikipedia.org/wiki/Supervisor_Mode_Access_Prevention
>
> Sohil
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
new file mode 100644
index 0000000000000..d9247cc89cf3e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
@@ -0,0 +1,61 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
+#include "test_progs.h"
+#include "read_vsyscall.skel.h"
+
+#if defined(__x86_64__)
+/* For VSYSCALL_ADDR */
+#include <asm/vsyscall.h>
+#else
+/* To prevent build failure on non-x86 arch */
+#define VSYSCALL_ADDR 0UL
+#endif
+
+struct read_ret_desc {
+	const char *name;
+	int ret;
+} all_read[] = {
+	{ .name = "probe_read_kernel", .ret = -ERANGE },
+	{ .name = "probe_read_kernel_str", .ret = -ERANGE },
+	{ .name = "probe_read", .ret = -ERANGE },
+	{ .name = "probe_read_str", .ret = -ERANGE },
+	/* __access_ok() will fail */
+	{ .name = "probe_read_user", .ret = -EFAULT },
+	/* __access_ok() will fail */
+	{ .name = "probe_read_user_str", .ret = -EFAULT },
+	/* access_ok() will fail */
+	{ .name = "copy_from_user", .ret = -EFAULT },
+	/* both vma_lookup() and expand_stack() will fail */
+	{ .name = "copy_from_user_task", .ret = -EFAULT },
+};
+
+void test_read_vsyscall(void)
+{
+	struct read_vsyscall *skel;
+	unsigned int i;
+	int err;
+
+#if !defined(__x86_64__)
+	test__skip();
+	return;
+#endif
+	skel = read_vsyscall__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "read_vsyscall open_load"))
+		return;
+
+	skel->bss->target_pid = getpid();
+	err = read_vsyscall__attach(skel);
+	if (!ASSERT_EQ(err, 0, "read_vsyscall attach"))
+		goto out;
+
+	/* userspace may don't have vsyscall page due to LEGACY_VSYSCALL_NONE,
+	 * but it doesn't affect the returned error codes.
+	 */
+	skel->bss->user_ptr = (void *)VSYSCALL_ADDR;
+	usleep(1);
+
+	for (i = 0; i < ARRAY_SIZE(all_read); i++)
+		ASSERT_EQ(skel->bss->read_ret[i], all_read[i].ret, all_read[i].name);
+out:
+	read_vsyscall__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/read_vsyscall.c b/tools/testing/selftests/bpf/progs/read_vsyscall.c
new file mode 100644
index 0000000000000..986f96687ae15
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/read_vsyscall.c
@@ -0,0 +1,45 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+int target_pid = 0;
+void *user_ptr = 0;
+int read_ret[8];
+
+char _license[] SEC("license") = "GPL";
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int do_probe_read(void *ctx)
+{
+	char buf[8];
+
+	if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+		return 0;
+
+	read_ret[0] = bpf_probe_read_kernel(buf, sizeof(buf), user_ptr);
+	read_ret[1] = bpf_probe_read_kernel_str(buf, sizeof(buf), user_ptr);
+	read_ret[2] = bpf_probe_read(buf, sizeof(buf), user_ptr);
+	read_ret[3] = bpf_probe_read_str(buf, sizeof(buf), user_ptr);
+	read_ret[4] = bpf_probe_read_user(buf, sizeof(buf), user_ptr);
+	read_ret[5] = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr);
+
+	return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int do_copy_from_user(void *ctx)
+{
+	char buf[8];
+
+	if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+		return 0;
+
+	read_ret[6] = bpf_copy_from_user(buf, sizeof(buf), user_ptr);
+	read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr,
+					      bpf_get_current_task_btf(), 0);
+
+	return 0;
+}