diff mbox series

[bpf-next,1/2] bpf: Add arm64 JIT support for PROBE_MEM32 pseudo instructions.

Message ID 20240314150003.123020-2-puranjay12@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf,arm64: Add support for BPF Arena | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 1 maintainers not CCed: linux-arm-kernel@lists.infradead.org
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-9 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-15 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-17 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test
bpf/vmtest-bpf-next-VM_Test-8 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-16 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-4 fail Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization

Commit Message

Puranjay Mohan March 14, 2024, 3 p.m. UTC
Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW]
instructions.  They are similar to PROBE_MEM instructions with the
following differences:
- PROBE_MEM32 supports store.
- PROBE_MEM32 relies on the verifier to clear upper 32-bit of the
  src/dst register
- PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in R28
  in the prologue). Due to bpf_arena constructions such R28 + reg +
  off16 access is guaranteed to be within arena virtual range, so no
  address check at run-time.
- PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When
  LDX faults the destination register is zeroed.

To support these on arm64, we do tmp2 = R28 + src/dst reg and then use
tmp2 as the new src/dst register. This allows us to reuse most of the
code for normal [LDX | STX | ST].

Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
---
 arch/arm64/net/bpf_jit_comp.c | 70 ++++++++++++++++++++++++++++++-----
 1 file changed, 60 insertions(+), 10 deletions(-)

Comments

Kumar Kartikeya Dwivedi March 14, 2024, 5:07 p.m. UTC | #1
On Thu, 14 Mar 2024 at 16:00, Puranjay Mohan <puranjay12@gmail.com> wrote:
>
> Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW]
> instructions.  They are similar to PROBE_MEM instructions with the
> following differences:
> - PROBE_MEM32 supports store.
> - PROBE_MEM32 relies on the verifier to clear upper 32-bit of the
>   src/dst register
> - PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in R28
>   in the prologue). Due to bpf_arena constructions such R28 + reg +
>   off16 access is guaranteed to be within arena virtual range, so no
>   address check at run-time.
> - PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When
>   LDX faults the destination register is zeroed.
>
> To support these on arm64, we do tmp2 = R28 + src/dst reg and then use
> tmp2 as the new src/dst register. This allows us to reuse most of the
> code for normal [LDX | STX | ST].
>
> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
> ---

Hi Alexei,
Puranjay and I were discussing this stuff off list and noticed that
atomic instructions are not handled.
It turns out that will cause a kernel crash right now because the
32-bit offset into arena will be dereferenced directly.

e.g. something like this:

@@ -55,6 +56,7 @@ int arena_list_add(void *ctx)
                test_val++;
                n->value = i;
                arena_sum += i;
+               __sync_fetch_and_add(&arena_sum, 0);
                list_add_head(&n->node, list_head);
        }
 #else

I will try to prepare a fix for the x86 JIT. Puranjay will do the same
for his set.
Puranjay Mohan March 14, 2024, 5:13 p.m. UTC | #2
Kumar Kartikeya Dwivedi <memxor@gmail.com> writes:

> On Thu, 14 Mar 2024 at 16:00, Puranjay Mohan <puranjay12@gmail.com> wrote:
>>
>> Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW]
>> instructions.  They are similar to PROBE_MEM instructions with the
>> following differences:
>> - PROBE_MEM32 supports store.
>> - PROBE_MEM32 relies on the verifier to clear upper 32-bit of the
>>   src/dst register
>> - PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in R28
>>   in the prologue). Due to bpf_arena constructions such R28 + reg +
>>   off16 access is guaranteed to be within arena virtual range, so no
>>   address check at run-time.
>> - PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When
>>   LDX faults the destination register is zeroed.
>>
>> To support these on arm64, we do tmp2 = R28 + src/dst reg and then use
>> tmp2 as the new src/dst register. This allows us to reuse most of the
>> code for normal [LDX | STX | ST].
>>
>> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
>> ---
>
> Hi Alexei,
> Puranjay and I were discussing this stuff off list and noticed that
> atomic instructions are not handled.
> It turns out that will cause a kernel crash right now because the
> 32-bit offset into arena will be dereferenced directly.
>
> e.g. something like this:
>
> @@ -55,6 +56,7 @@ int arena_list_add(void *ctx)
>                 test_val++;
>                 n->value = i;
>                 arena_sum += i;
> +               __sync_fetch_and_add(&arena_sum, 0);
>                 list_add_head(&n->node, list_head);
>         }
>  #else
>
> I will try to prepare a fix for the x86 JIT. Puranjay will do the same
> for his set.

Yes, testing the change mentioned by Kumar on ARM64 causes a crashes as well:

bpf_testmod: loading out-of-tree module taints kernel.
 bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel
 Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010
 Mem abort info:
   ESR = 0x0000000096000006
   EC = 0x25: DABT (current EL), IL = 32 bits
   SET = 0, FnV = 0
   EA = 0, S1PTW = 0
   FSC = 0x06: level 2 translation fault
 Data abort info:
   ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000
   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
 user pgtable: 4k pages, 48-bit VAs, pgdp=00000004043cc000
 [0000000000000010] pgd=0800000410d8f003, p4d=0800000410d8f003, pud=0800000405972003, pmd=0000000000000000
 Internal error: Oops: 0000000096000006 [#1] SMP
 Modules linked in: bpf_testmod(OE) nls_ascii nls_cp437 sunrpc vfat fat aes_ce_blk aes_ce_cipher ghash_ce sha1_ce button sch_fq_codel dm_mod dax configfs dmi_sysfs sha2_ce sha256_arm64 efivarfs
 CPU: 8 PID: 5631 Comm: test_progs Tainted: G           OE      6.8.0+ #2
 Hardware name: Amazon EC2 c6g.16xlarge/, BIOS 1.0 11/1/2018
 pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
 pc : bpf_prog_8771c336cb6a18eb_arena_list_add+0x204/0x2b8
 lr : bpf_prog_8771c336cb6a18eb_arena_list_add+0x144/0x2b8
 sp : ffff80008b84bc30
 x29: ffff80008b84bca0 x28: ffff8000a5008000 x27: ffff80008b84bc38
 x26: 0000000000000000 x25: ffff80008b84bc60 x24: 0000000000000000
 x23: 0000000000000000 x22: 0000000000000058 x21: 0000000000000838
 x20: 0000000000000000 x19: 0000000100001fe0 x18: 0000000000000000
 x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffffcc66d2c8
 x14: 0000000000000000 x13: 0000000000000000 x12: 000000000004058c
 x11: ffff8000a5008010 x10: 00000000ffffffff x9 : 00000000000002cf
 x8 : ffff800082ff4ab8 x7 : 0000000100001000 x6 : 0000000000000001
 x5 : 0000000010e5e3fd x4 : 000000003619b978 x3 : 0000000000000010
 x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000001fe0
 Call trace:
  bpf_prog_8771c336cb6a18eb_arena_list_add+0x204/0x2b8
  bpf_prog_test_run_syscall+0x100/0x340
  __sys_bpf+0x8e8/0xa20
  __arm64_sys_bpf+0x2c/0x48
  invoke_syscall+0x50/0x128
  el0_svc_common.constprop.0+0x48/0xf8
  do_el0_svc+0x28/0x40
  el0_svc+0x58/0x190
  el0t_64_sync_handler+0x13c/0x158
  el0t_64_sync+0x1a8/0x1b0
 Code: 8b010042 8b1c006b f9000162 d2800001 (f821307f)
 ---[ end trace 0000000000000000 ]---
 Kernel panic - not syncing: Oops: Fatal exception
 SMP: stopping secondary CPUs
 Kernel Offset: disabled
 CPU features: 0x0,00000120,7002014a,21407a0b
 Memory Limit: none
 Rebooting in 5 seconds..

I will send v2 with the arm64 JIT fix, but I guess verifier has to be modified
as well to add BPF_PROBE_MEM32 to atomic instructions.

Thanks,
Puranjay
Alexei Starovoitov March 14, 2024, 5:21 p.m. UTC | #3
On Thu, Mar 14, 2024 at 10:13 AM Puranjay Mohan <puranjay12@gmail.com> wrote:
>
> Kumar Kartikeya Dwivedi <memxor@gmail.com> writes:
>
> > On Thu, 14 Mar 2024 at 16:00, Puranjay Mohan <puranjay12@gmail.com> wrote:
> >>
> >> Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW]
> >> instructions.  They are similar to PROBE_MEM instructions with the
> >> following differences:
> >> - PROBE_MEM32 supports store.
> >> - PROBE_MEM32 relies on the verifier to clear upper 32-bit of the
> >>   src/dst register
> >> - PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in R28
> >>   in the prologue). Due to bpf_arena constructions such R28 + reg +
> >>   off16 access is guaranteed to be within arena virtual range, so no
> >>   address check at run-time.
> >> - PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When
> >>   LDX faults the destination register is zeroed.
> >>
> >> To support these on arm64, we do tmp2 = R28 + src/dst reg and then use
> >> tmp2 as the new src/dst register. This allows us to reuse most of the
> >> code for normal [LDX | STX | ST].
> >>
> >> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
> >> ---
> >
> > Hi Alexei,
> > Puranjay and I were discussing this stuff off list and noticed that
> > atomic instructions are not handled.
> > It turns out that will cause a kernel crash right now because the
> > 32-bit offset into arena will be dereferenced directly.
> >
> > e.g. something like this:
> >
> > @@ -55,6 +56,7 @@ int arena_list_add(void *ctx)
> >                 test_val++;
> >                 n->value = i;
> >                 arena_sum += i;
> > +               __sync_fetch_and_add(&arena_sum, 0);
> >                 list_add_head(&n->node, list_head);
> >         }
> >  #else
> >
> > I will try to prepare a fix for the x86 JIT. Puranjay will do the same
> > for his set.
>
> Yes, testing the change mentioned by Kumar on ARM64 causes a crashes as well:
>
> bpf_testmod: loading out-of-tree module taints kernel.
>  bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel
>  Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010
>  Mem abort info:
>    ESR = 0x0000000096000006
>    EC = 0x25: DABT (current EL), IL = 32 bits
>    SET = 0, FnV = 0
>    EA = 0, S1PTW = 0
>    FSC = 0x06: level 2 translation fault
>  Data abort info:
>    ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000
>    CM = 0, WnR = 0, TnD = 0, TagAccess = 0
>    GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
>  user pgtable: 4k pages, 48-bit VAs, pgdp=00000004043cc000
>  [0000000000000010] pgd=0800000410d8f003, p4d=0800000410d8f003, pud=0800000405972003, pmd=0000000000000000
>  Internal error: Oops: 0000000096000006 [#1] SMP
>  Modules linked in: bpf_testmod(OE) nls_ascii nls_cp437 sunrpc vfat fat aes_ce_blk aes_ce_cipher ghash_ce sha1_ce button sch_fq_codel dm_mod dax configfs dmi_sysfs sha2_ce sha256_arm64 efivarfs
>  CPU: 8 PID: 5631 Comm: test_progs Tainted: G           OE      6.8.0+ #2
>  Hardware name: Amazon EC2 c6g.16xlarge/, BIOS 1.0 11/1/2018
>  pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
>  pc : bpf_prog_8771c336cb6a18eb_arena_list_add+0x204/0x2b8
>  lr : bpf_prog_8771c336cb6a18eb_arena_list_add+0x144/0x2b8
>  sp : ffff80008b84bc30
>  x29: ffff80008b84bca0 x28: ffff8000a5008000 x27: ffff80008b84bc38
>  x26: 0000000000000000 x25: ffff80008b84bc60 x24: 0000000000000000
>  x23: 0000000000000000 x22: 0000000000000058 x21: 0000000000000838
>  x20: 0000000000000000 x19: 0000000100001fe0 x18: 0000000000000000
>  x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffffcc66d2c8
>  x14: 0000000000000000 x13: 0000000000000000 x12: 000000000004058c
>  x11: ffff8000a5008010 x10: 00000000ffffffff x9 : 00000000000002cf
>  x8 : ffff800082ff4ab8 x7 : 0000000100001000 x6 : 0000000000000001
>  x5 : 0000000010e5e3fd x4 : 000000003619b978 x3 : 0000000000000010
>  x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000001fe0
>  Call trace:
>   bpf_prog_8771c336cb6a18eb_arena_list_add+0x204/0x2b8
>   bpf_prog_test_run_syscall+0x100/0x340
>   __sys_bpf+0x8e8/0xa20
>   __arm64_sys_bpf+0x2c/0x48
>   invoke_syscall+0x50/0x128
>   el0_svc_common.constprop.0+0x48/0xf8
>   do_el0_svc+0x28/0x40
>   el0_svc+0x58/0x190
>   el0t_64_sync_handler+0x13c/0x158
>   el0t_64_sync+0x1a8/0x1b0
>  Code: 8b010042 8b1c006b f9000162 d2800001 (f821307f)
>  ---[ end trace 0000000000000000 ]---
>  Kernel panic - not syncing: Oops: Fatal exception
>  SMP: stopping secondary CPUs
>  Kernel Offset: disabled
>  CPU features: 0x0,00000120,7002014a,21407a0b
>  Memory Limit: none
>  Rebooting in 5 seconds..
>
> I will send v2 with the arm64 JIT fix, but I guess verifier has to be modified
> as well to add BPF_PROBE_MEM32 to atomic instructions.

The JIT and the verifier changes for atomics might be too big.
Let's disable atomics in arena in the verifier for now.
Pls send a patch.
Puranjay Mohan March 15, 2024, 10:31 a.m. UTC | #4
On Thu, Mar 14, 2024 at 6:21 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Thu, Mar 14, 2024 at 10:13 AM Puranjay Mohan <puranjay12@gmail.com> wrote:
> >
> > Kumar Kartikeya Dwivedi <memxor@gmail.com> writes:
> >
> > > On Thu, 14 Mar 2024 at 16:00, Puranjay Mohan <puranjay12@gmail.com> wrote:
> > >>
> > >> Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW]
> > >> instructions.  They are similar to PROBE_MEM instructions with the
> > >> following differences:
> > >> - PROBE_MEM32 supports store.
> > >> - PROBE_MEM32 relies on the verifier to clear upper 32-bit of the
> > >>   src/dst register
> > >> - PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in R28
> > >>   in the prologue). Due to bpf_arena constructions such R28 + reg +
> > >>   off16 access is guaranteed to be within arena virtual range, so no
> > >>   address check at run-time.
> > >> - PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When
> > >>   LDX faults the destination register is zeroed.
> > >>
> > >> To support these on arm64, we do tmp2 = R28 + src/dst reg and then use
> > >> tmp2 as the new src/dst register. This allows us to reuse most of the
> > >> code for normal [LDX | STX | ST].
> > >>
> > >> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
> > >> ---
> > >
> > > Hi Alexei,
> > > Puranjay and I were discussing this stuff off list and noticed that
> > > atomic instructions are not handled.
> > > It turns out that will cause a kernel crash right now because the
> > > 32-bit offset into arena will be dereferenced directly.
> > >
> > > e.g. something like this:
> > >
> > > @@ -55,6 +56,7 @@ int arena_list_add(void *ctx)
> > >                 test_val++;
> > >                 n->value = i;
> > >                 arena_sum += i;
> > > +               __sync_fetch_and_add(&arena_sum, 0);
> > >                 list_add_head(&n->node, list_head);
> > >         }
> > >  #else
> > >
> > > I will try to prepare a fix for the x86 JIT. Puranjay will do the same
> > > for his set.
> >
> > Yes, testing the change mentioned by Kumar on ARM64 causes a crashes as well:
> >
> > bpf_testmod: loading out-of-tree module taints kernel.
> >  bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel
> >  Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010
> >  Mem abort info:
> >    ESR = 0x0000000096000006
> >    EC = 0x25: DABT (current EL), IL = 32 bits
> >    SET = 0, FnV = 0
> >    EA = 0, S1PTW = 0
> >    FSC = 0x06: level 2 translation fault
> >  Data abort info:
> >    ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000
> >    CM = 0, WnR = 0, TnD = 0, TagAccess = 0
> >    GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
> >  user pgtable: 4k pages, 48-bit VAs, pgdp=00000004043cc000
> >  [0000000000000010] pgd=0800000410d8f003, p4d=0800000410d8f003, pud=0800000405972003, pmd=0000000000000000
> >  Internal error: Oops: 0000000096000006 [#1] SMP
> >  Modules linked in: bpf_testmod(OE) nls_ascii nls_cp437 sunrpc vfat fat aes_ce_blk aes_ce_cipher ghash_ce sha1_ce button sch_fq_codel dm_mod dax configfs dmi_sysfs sha2_ce sha256_arm64 efivarfs
> >  CPU: 8 PID: 5631 Comm: test_progs Tainted: G           OE      6.8.0+ #2
> >  Hardware name: Amazon EC2 c6g.16xlarge/, BIOS 1.0 11/1/2018
> >  pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> >  pc : bpf_prog_8771c336cb6a18eb_arena_list_add+0x204/0x2b8
> >  lr : bpf_prog_8771c336cb6a18eb_arena_list_add+0x144/0x2b8
> >  sp : ffff80008b84bc30
> >  x29: ffff80008b84bca0 x28: ffff8000a5008000 x27: ffff80008b84bc38
> >  x26: 0000000000000000 x25: ffff80008b84bc60 x24: 0000000000000000
> >  x23: 0000000000000000 x22: 0000000000000058 x21: 0000000000000838
> >  x20: 0000000000000000 x19: 0000000100001fe0 x18: 0000000000000000
> >  x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffffcc66d2c8
> >  x14: 0000000000000000 x13: 0000000000000000 x12: 000000000004058c
> >  x11: ffff8000a5008010 x10: 00000000ffffffff x9 : 00000000000002cf
> >  x8 : ffff800082ff4ab8 x7 : 0000000100001000 x6 : 0000000000000001
> >  x5 : 0000000010e5e3fd x4 : 000000003619b978 x3 : 0000000000000010
> >  x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000001fe0
> >  Call trace:
> >   bpf_prog_8771c336cb6a18eb_arena_list_add+0x204/0x2b8
> >   bpf_prog_test_run_syscall+0x100/0x340
> >   __sys_bpf+0x8e8/0xa20
> >   __arm64_sys_bpf+0x2c/0x48
> >   invoke_syscall+0x50/0x128
> >   el0_svc_common.constprop.0+0x48/0xf8
> >   do_el0_svc+0x28/0x40
> >   el0_svc+0x58/0x190
> >   el0t_64_sync_handler+0x13c/0x158
> >   el0t_64_sync+0x1a8/0x1b0
> >  Code: 8b010042 8b1c006b f9000162 d2800001 (f821307f)
> >  ---[ end trace 0000000000000000 ]---
> >  Kernel panic - not syncing: Oops: Fatal exception
> >  SMP: stopping secondary CPUs
> >  Kernel Offset: disabled
> >  CPU features: 0x0,00000120,7002014a,21407a0b
> >  Memory Limit: none
> >  Rebooting in 5 seconds..
> >
> > I will send v2 with the arm64 JIT fix, but I guess verifier has to be modified
> > as well to add BPF_PROBE_MEM32 to atomic instructions.
>
> The JIT and the verifier changes for atomics might be too big.
> Let's disable atomics in arena in the verifier for now.
> Pls send a patch.

As atomics are disabled in the Arena now, this series will not require
any changes.
Looking forward to the reviews.

Thanks,
Puranjay
diff mbox series

Patch

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index c5b461dda438..ce66c17b73a0 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -29,6 +29,7 @@ 
 #define TCALL_CNT (MAX_BPF_JIT_REG + 2)
 #define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
 #define FP_BOTTOM (MAX_BPF_JIT_REG + 4)
+#define PROBE_MEM32_BASE (MAX_BPF_JIT_REG + 5)
 
 #define check_imm(bits, imm) do {				\
 	if ((((imm) > 0) && ((imm) >> (bits))) ||		\
@@ -67,6 +68,8 @@  static const int bpf2a64[] = {
 	/* temporary register for blinding constants */
 	[BPF_REG_AX] = A64_R(9),
 	[FP_BOTTOM] = A64_R(27),
+	/* callee saved register for kern_vm_start address */
+	[PROBE_MEM32_BASE] = A64_R(28),
 };
 
 struct jit_ctx {
@@ -295,7 +298,7 @@  static bool is_lsi_offset(int offset, int scale)
 #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8)
 
 static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
-			  bool is_exception_cb)
+			  bool is_exception_cb, u64 arena_vm_start)
 {
 	const struct bpf_prog *prog = ctx->prog;
 	const bool is_main_prog = !bpf_is_subprog(prog);
@@ -306,6 +309,7 @@  static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
 	const u8 fp = bpf2a64[BPF_REG_FP];
 	const u8 tcc = bpf2a64[TCALL_CNT];
 	const u8 fpb = bpf2a64[FP_BOTTOM];
+	const u8 pb = bpf2a64[PROBE_MEM32_BASE];
 	const int idx0 = ctx->idx;
 	int cur_offset;
 
@@ -411,6 +415,10 @@  static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
 
 	/* Set up function call stack */
 	emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
+
+	if (arena_vm_start)
+		emit_a64_mov_i64(pb, arena_vm_start, ctx);
+
 	return 0;
 }
 
@@ -738,6 +746,7 @@  static void build_epilogue(struct jit_ctx *ctx, bool is_exception_cb)
 
 #define BPF_FIXUP_OFFSET_MASK	GENMASK(26, 0)
 #define BPF_FIXUP_REG_MASK	GENMASK(31, 27)
+#define DONT_CLEAR 32
 
 bool ex_handler_bpf(const struct exception_table_entry *ex,
 		    struct pt_regs *regs)
@@ -745,7 +754,8 @@  bool ex_handler_bpf(const struct exception_table_entry *ex,
 	off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
 	int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
 
-	regs->regs[dst_reg] = 0;
+	if (dst_reg != DONT_CLEAR)
+		regs->regs[dst_reg] = 0;
 	regs->pc = (unsigned long)&ex->fixup - offset;
 	return true;
 }
@@ -765,7 +775,8 @@  static int add_exception_handler(const struct bpf_insn *insn,
 		return 0;
 
 	if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
-		BPF_MODE(insn->code) != BPF_PROBE_MEMSX)
+		BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
+			BPF_MODE(insn->code) != BPF_PROBE_MEM32)
 		return 0;
 
 	if (!ctx->prog->aux->extable ||
@@ -810,6 +821,9 @@  static int add_exception_handler(const struct bpf_insn *insn,
 
 	ex->insn = ins_offset;
 
+	if (BPF_CLASS(insn->code) != BPF_LDX)
+		dst_reg = DONT_CLEAR;
+
 	ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
 		    FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
 
@@ -829,12 +843,13 @@  static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 		      bool extra_pass)
 {
 	const u8 code = insn->code;
-	const u8 dst = bpf2a64[insn->dst_reg];
-	const u8 src = bpf2a64[insn->src_reg];
+	u8 dst = bpf2a64[insn->dst_reg];
+	u8 src = bpf2a64[insn->src_reg];
 	const u8 tmp = bpf2a64[TMP_REG_1];
 	const u8 tmp2 = bpf2a64[TMP_REG_2];
 	const u8 fp = bpf2a64[BPF_REG_FP];
 	const u8 fpb = bpf2a64[FP_BOTTOM];
+	const u8 pb = bpf2a64[PROBE_MEM32_BASE];
 	const s16 off = insn->off;
 	const s32 imm = insn->imm;
 	const int i = insn - ctx->prog->insnsi;
@@ -1237,7 +1252,15 @@  static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
-		if (ctx->fpb_offset > 0 && src == fp) {
+	case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
+	case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
+	case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
+	case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
+		if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
+			emit(A64_ADD(1, tmp2, src, pb), ctx);
+			src = tmp2;
+		}
+		if (ctx->fpb_offset > 0 && src == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) {
 			src_adj = fpb;
 			off_adj = off + ctx->fpb_offset;
 		} else {
@@ -1322,7 +1345,15 @@  static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 	case BPF_ST | BPF_MEM | BPF_H:
 	case BPF_ST | BPF_MEM | BPF_B:
 	case BPF_ST | BPF_MEM | BPF_DW:
-		if (ctx->fpb_offset > 0 && dst == fp) {
+	case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
+	case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
+	case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
+	case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
+		if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
+			emit(A64_ADD(1, tmp2, dst, pb), ctx);
+			dst = tmp2;
+		}
+		if (ctx->fpb_offset > 0 && dst == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) {
 			dst_adj = fpb;
 			off_adj = off + ctx->fpb_offset;
 		} else {
@@ -1365,6 +1396,10 @@  static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			}
 			break;
 		}
+
+		ret = add_exception_handler(insn, ctx, dst);
+		if (ret)
+			return ret;
 		break;
 
 	/* STX: *(size *)(dst + off) = src */
@@ -1372,7 +1407,15 @@  static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 	case BPF_STX | BPF_MEM | BPF_H:
 	case BPF_STX | BPF_MEM | BPF_B:
 	case BPF_STX | BPF_MEM | BPF_DW:
-		if (ctx->fpb_offset > 0 && dst == fp) {
+	case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
+	case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
+	case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
+	case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
+		if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
+			emit(A64_ADD(1, tmp2, dst, pb), ctx);
+			dst = tmp2;
+		}
+		if (ctx->fpb_offset > 0 && dst == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) {
 			dst_adj = fpb;
 			off_adj = off + ctx->fpb_offset;
 		} else {
@@ -1413,6 +1456,10 @@  static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			}
 			break;
 		}
+
+		ret = add_exception_handler(insn, ctx, dst);
+		if (ret)
+			return ret;
 		break;
 
 	case BPF_STX | BPF_ATOMIC | BPF_W:
@@ -1594,6 +1641,7 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	bool tmp_blinded = false;
 	bool extra_pass = false;
 	struct jit_ctx ctx;
+	u64 arena_vm_start;
 	u8 *image_ptr;
 	u8 *ro_image_ptr;
 
@@ -1611,6 +1659,7 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		prog = tmp;
 	}
 
+	arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
 	jit_data = prog->aux->jit_data;
 	if (!jit_data) {
 		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
@@ -1648,7 +1697,8 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	 * BPF line info needs ctx->offset[i] to be the offset of
 	 * instruction[i] in jited image, so build prologue first.
 	 */
-	if (build_prologue(&ctx, was_classic, prog->aux->exception_cb)) {
+	if (build_prologue(&ctx, was_classic, prog->aux->exception_cb,
+			   arena_vm_start)) {
 		prog = orig_prog;
 		goto out_off;
 	}
@@ -1696,7 +1746,7 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	ctx.idx = 0;
 	ctx.exentry_idx = 0;
 
-	build_prologue(&ctx, was_classic, prog->aux->exception_cb);
+	build_prologue(&ctx, was_classic, prog->aux->exception_cb, arena_vm_start);
 
 	if (build_body(&ctx, extra_pass)) {
 		prog = orig_prog;