diff mbox series

[bpf,v2,6/7] bpf: Use __u64 to save the bits in bits iterator

Message ID 20241021014004.1647816-7-houtao@huaweicloud.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Misc fixes for bpf | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 9 this patch: 9
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 13 of 13 maintainers
netdev/build_clang success Errors and warnings before: 7 this patch: 7
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 61 this patch: 61
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 62 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 10 this patch: 10
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-7 fail Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-8 fail Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-VM_Test-14 fail Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-13 fail Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-21 fail Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-22 fail Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-30 fail Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-31 fail Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-PR fail PR summary
bpf/vmtest-bpf-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-37 fail Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-38 fail Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-39 fail Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Hou Tao Oct. 21, 2024, 1:40 a.m. UTC
From: Hou Tao <houtao1@huawei.com>

On 32-bit hosts (e.g., arm32), when a bpf program passes a u64 to
bpf_iter_bits_new(), bpf_iter_bits_new() will use bits_copy to store the
content of the u64. However, bits_copy is only 4 bytes, leading to stack
corruption.

The straightforward solution would be to replace u64 with unsigned long
in bpf_iter_bits_new(). However, this introduces confusion and problems
for 32-bit hosts because the size of ulong in bpf program is 8 bytes,
but it is treated as 4-bytes after passed to bpf_iter_bits_new().

Fix it by changing the type of both bits and bit_count from unsigned
long to u64. However, the change is not enough. The main reason is that
bpf_iter_bits_next() uses find_next_bit() to find the next bit and the
pointer passed to find_next_bit() is an unsigned long pointer instead
of a u64 pointer. For 32-bit little-endian host, it is fine but it is
not the case for 32-bit big-endian host. Because under 32-bit big-endian
host, the first iterated unsigned long will be the bits 32-63 of the u64
instead of the expected bits 0-31. Therefore, in addition to changing
the type, swap the two unsigned longs within the u64 for 32-bit
big-endian host.

Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 kernel/bpf/helpers.c | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

Comments

Yafang Shao Oct. 23, 2024, 3:10 a.m. UTC | #1
On Mon, Oct 21, 2024 at 9:28 AM Hou Tao <houtao@huaweicloud.com> wrote:
>
> From: Hou Tao <houtao1@huawei.com>
>
> On 32-bit hosts (e.g., arm32), when a bpf program passes a u64 to
> bpf_iter_bits_new(), bpf_iter_bits_new() will use bits_copy to store the
> content of the u64. However, bits_copy is only 4 bytes, leading to stack
> corruption.
>
> The straightforward solution would be to replace u64 with unsigned long
> in bpf_iter_bits_new(). However, this introduces confusion and problems
> for 32-bit hosts because the size of ulong in bpf program is 8 bytes,
> but it is treated as 4-bytes after passed to bpf_iter_bits_new().
>
> Fix it by changing the type of both bits and bit_count from unsigned
> long to u64.

Thank you for the fix. This change is necessary.

>  However, the change is not enough. The main reason is that
> bpf_iter_bits_next() uses find_next_bit() to find the next bit and the
> pointer passed to find_next_bit() is an unsigned long pointer instead
> of a u64 pointer. For 32-bit little-endian host, it is fine but it is
> not the case for 32-bit big-endian host. Because under 32-bit big-endian
> host, the first iterated unsigned long will be the bits 32-63 of the u64
> instead of the expected bits 0-31. Therefore, in addition to changing
> the type, swap the two unsigned longs within the u64 for 32-bit
> big-endian host.

The API uses a u64 data type, and the nr_words parameter represents
the number of 8-byte units. On a 32-bit system, if you want to call
this API, you would define an array like `u32 data[2]` and invoke the
function as `bpf_for_each(bits, bit, &data[0], 1)`. However, since the
API expects a u64, you'll need to merge the two u32 values into a
single u64 value.

Given this, it might be more appropriate to ask users to handle the
u32 to u64 merge on their side when preparing the data, rather than
performing the swap within the kernel itself.

--
Regards

Yafang
Hou Tao Oct. 23, 2024, 8:09 a.m. UTC | #2
Hi,

On 10/23/2024 11:10 AM, Yafang Shao wrote:
> On Mon, Oct 21, 2024 at 9:28 AM Hou Tao <houtao@huaweicloud.com> wrote:
>> From: Hou Tao <houtao1@huawei.com>
>>
>> On 32-bit hosts (e.g., arm32), when a bpf program passes a u64 to
>> bpf_iter_bits_new(), bpf_iter_bits_new() will use bits_copy to store the
>> content of the u64. However, bits_copy is only 4 bytes, leading to stack
>> corruption.
>>
>> The straightforward solution would be to replace u64 with unsigned long
>> in bpf_iter_bits_new(). However, this introduces confusion and problems
>> for 32-bit hosts because the size of ulong in bpf program is 8 bytes,
>> but it is treated as 4-bytes after passed to bpf_iter_bits_new().
>>
>> Fix it by changing the type of both bits and bit_count from unsigned
>> long to u64.
> Thank you for the fix. This change is necessary.
>
>>  However, the change is not enough. The main reason is that
>> bpf_iter_bits_next() uses find_next_bit() to find the next bit and the
>> pointer passed to find_next_bit() is an unsigned long pointer instead
>> of a u64 pointer. For 32-bit little-endian host, it is fine but it is
>> not the case for 32-bit big-endian host. Because under 32-bit big-endian
>> host, the first iterated unsigned long will be the bits 32-63 of the u64
>> instead of the expected bits 0-31. Therefore, in addition to changing
>> the type, swap the two unsigned longs within the u64 for 32-bit
>> big-endian host.
> The API uses a u64 data type, and the nr_words parameter represents
> the number of 8-byte units. On a 32-bit system, if you want to call
> this API, you would define an array like `u32 data[2]` and invoke the
> function as `bpf_for_each(bits, bit, &data[0], 1)`. However, since the
> API expects a u64, you'll need to merge the two u32 values into a
> single u64 value.

It is a bit weird to pass a u32 pointer to bpf_for_each, because it
expects a u64 pointer. I think the bpf program should pass a u64 pointer
instead.
>
> Given this, it might be more appropriate to ask users to handle the
> u32 to u64 merge on their side when preparing the data, rather than
> performing the swap within the kernel itself.

However, the swap implemented in the patch has nothing to do whether the
user passes pointer to u32 array or not. It is necessary due to the
mismatched between the pointer type used by find_next_bit and the
pointer used by bpf_iter_bits_new(). The latter uses u64 pointer, but
find_next_bit() uses unsigned long pointer and iterates a long each
time. So just like the comment in the patch said:

    under 32-bit big-endian host, the first iterated unsigned long will
be the bits 32-63 of the u64 instead of the expected bits 0-31.

The swap in the patch will swap the two long values in the u64 and make
the first iterated unsigned long will be the bits 0-31 of the u64 value.
>
> --
> Regards
>
> Yafang
diff mbox series

Patch

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index c147f75e1b48..0ad85201fb84 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2855,13 +2855,36 @@  struct bpf_iter_bits {
 
 struct bpf_iter_bits_kern {
 	union {
-		unsigned long *bits;
-		unsigned long bits_copy;
+		__u64 *bits;
+		__u64 bits_copy;
 	};
 	u32 nr_bits;
 	int bit;
 } __aligned(8);
 
+/* On 64-bit hosts, unsigned long and u64 have the same size, so passing
+ * a u64 pointer and an unsigned long pointer to find_next_bit() will
+ * return the same result, as both point to the same 8-byte area.
+ *
+ * For 32-bit little-endian hosts, using a u64 pointer or unsigned long
+ * pointer also makes no difference. This is because the first iterated
+ * unsigned long is composed of bits 0-31 of the u64 and the second unsigned
+ * long is composed of bits 32-63 of the u64.
+ *
+ * However, for 32-bit big-endian hosts, this is not the case. The first
+ * iterated unsigned long will be bits 32-63 of the u64, so swap these two
+ * ulong values within the u64.
+ */
+static void swap_ulong_in_u64(u64 *bits, unsigned int nr)
+{
+#if !defined(CONFIG_64BIT) && defined(__BIG_ENDIAN)
+	unsigned int i;
+
+	for (i = 0; i < nr; i++)
+		bits[i] = (bits[i] >> 32) | ((u64)(u32)bits[i] << 32);
+#endif
+}
+
 /**
  * bpf_iter_bits_new() - Initialize a new bits iterator for a given memory area
  * @it: The new bpf_iter_bits to be created
@@ -2903,6 +2926,8 @@  bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
 		if (err)
 			return -EFAULT;
 
+		swap_ulong_in_u64(&kit->bits_copy, nr_words);
+
 		kit->nr_bits = nr_bits;
 		return 0;
 	}
@@ -2918,6 +2943,8 @@  bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
 		return err;
 	}
 
+	swap_ulong_in_u64(kit->bits, nr_words);
+
 	kit->nr_bits = nr_bits;
 	return 0;
 }
@@ -2935,7 +2962,7 @@  __bpf_kfunc int *bpf_iter_bits_next(struct bpf_iter_bits *it)
 {
 	struct bpf_iter_bits_kern *kit = (void *)it;
 	u32 nr_bits = kit->nr_bits;
-	const unsigned long *bits;
+	const void *bits;
 	int bit;
 
 	if (kit->bit >= nr_bits)