diff mbox series

[bpf-next,v5,1/3] arm64: patching: Add aarch64_insn_copy()

Message ID 20230908144320.2474-2-puranjay12@gmail.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series bpf, arm64: use BPF prog pack allocator in BPF JIT | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 9 this patch: 9
netdev/cc_maintainers warning 1 maintainers not CCed: will@kernel.org
netdev/build_clang success Errors and warnings before: 9 this patch: 9
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 9 this patch: 9
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 54 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-5 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-1 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-6 fail Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 fail Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-28 success Logs for veristat
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-10 fail Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for test_maps on s390x with gcc

Commit Message

Puranjay Mohan Sept. 8, 2023, 2:43 p.m. UTC
This will be used by BPF JIT compiler to dump JITed binary to a RX huge
page, and thus allow multiple BPF programs sharing the a huge (2MB)
page.

The bpf_prog_pack allocator that implements the above feature allocates
a RX/RW buffer pair. The JITed code is written to the RW buffer and then
this function will be used to copy the code from RW to RX buffer.

Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
Acked-by: Song Liu <song@kernel.org>
---
 arch/arm64/include/asm/patching.h |  1 +
 arch/arm64/kernel/patching.c      | 41 +++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+)

Comments

Xu Kuohai Sept. 9, 2023, 9:04 a.m. UTC | #1
On 9/8/2023 10:43 PM, Puranjay Mohan wrote:
> This will be used by BPF JIT compiler to dump JITed binary to a RX huge
> page, and thus allow multiple BPF programs sharing the a huge (2MB)
> page.
> 
> The bpf_prog_pack allocator that implements the above feature allocates
> a RX/RW buffer pair. The JITed code is written to the RW buffer and then
> this function will be used to copy the code from RW to RX buffer.
> 
> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
> Acked-by: Song Liu <song@kernel.org>
> ---
>   arch/arm64/include/asm/patching.h |  1 +
>   arch/arm64/kernel/patching.c      | 41 +++++++++++++++++++++++++++++++
>   2 files changed, 42 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/patching.h
> index 68908b82b168..f78a0409cbdb 100644
> --- a/arch/arm64/include/asm/patching.h
> +++ b/arch/arm64/include/asm/patching.h
> @@ -8,6 +8,7 @@ int aarch64_insn_read(void *addr, u32 *insnp);
>   int aarch64_insn_write(void *addr, u32 insn);
>   
>   int aarch64_insn_write_literal_u64(void *addr, u64 val);
> +void *aarch64_insn_copy(void *dst, const void *src, size_t len);
>   
>   int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
>   int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
> diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
> index b4835f6d594b..243d6ae8d2d8 100644
> --- a/arch/arm64/kernel/patching.c
> +++ b/arch/arm64/kernel/patching.c
> @@ -105,6 +105,47 @@ noinstr int aarch64_insn_write_literal_u64(void *addr, u64 val)
>   	return ret;
>   }
>   
> +/**
> + * aarch64_insn_copy - Copy instructions into (an unused part of) RX memory
> + * @dst: address to modify
> + * @src: source of the copy
> + * @len: length to copy
> + *
> + * Useful for JITs to dump new code blocks into unused regions of RX memory.
> + */
> +noinstr void *aarch64_insn_copy(void *dst, const void *src, size_t len)
> +{
> +	unsigned long flags;
> +	size_t patched = 0;
> +	size_t size;
> +	void *waddr;
> +	void *ptr;
> +	int ret;
> +

check whether the input address and length are aligned to instruction size?

> +	raw_spin_lock_irqsave(&patch_lock, flags);
> +
> +	while (patched < len) {
> +		ptr = dst + patched;
> +		size = min_t(size_t, PAGE_SIZE - offset_in_page(ptr),
> +			     len - patched);
> +
> +		waddr = patch_map(ptr, FIX_TEXT_POKE0);
> +		ret = copy_to_kernel_nofault(waddr, src + patched, size);
> +		patch_unmap(FIX_TEXT_POKE0);
> +
> +		if (ret < 0) {
> +			raw_spin_unlock_irqrestore(&patch_lock, flags);
> +			return NULL;
> +		}
> +		patched += size;
> +	}
> +	raw_spin_unlock_irqrestore(&patch_lock, flags);
> +
> +	caches_clean_inval_pou((uintptr_t)dst, (uintptr_t)dst + len);
> +

seems flush_icache_range() or something like should be called here to
ensure the other CPUs' pipelines are cleared, otherwise the old instructions
at the dst address might be executed on other CPUs after the copy is complete,
which is not expected.

> +	return dst;
> +}
> +
>   int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
>   {
>   	u32 *tp = addr;
Puranjay Mohan Sept. 21, 2023, 2:33 p.m. UTC | #2
Xu Kuohai <xukuohai@huaweicloud.com> writes:

> On 9/8/2023 10:43 PM, Puranjay Mohan wrote:
>> This will be used by BPF JIT compiler to dump JITed binary to a RX huge
>> page, and thus allow multiple BPF programs sharing the a huge (2MB)
>> page.
>> 
>> The bpf_prog_pack allocator that implements the above feature allocates
>> a RX/RW buffer pair. The JITed code is written to the RW buffer and then
>> this function will be used to copy the code from RW to RX buffer.
>> 
>> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
>> Acked-by: Song Liu <song@kernel.org>
>> ---
>>   arch/arm64/include/asm/patching.h |  1 +
>>   arch/arm64/kernel/patching.c      | 41 +++++++++++++++++++++++++++++++
>>   2 files changed, 42 insertions(+)
>> 
>> diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/patching.h
>> index 68908b82b168..f78a0409cbdb 100644
>> --- a/arch/arm64/include/asm/patching.h
>> +++ b/arch/arm64/include/asm/patching.h
>> @@ -8,6 +8,7 @@ int aarch64_insn_read(void *addr, u32 *insnp);
>>   int aarch64_insn_write(void *addr, u32 insn);
>>   
>>   int aarch64_insn_write_literal_u64(void *addr, u64 val);
>> +void *aarch64_insn_copy(void *dst, const void *src, size_t len);
>>   
>>   int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
>>   int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
>> diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
>> index b4835f6d594b..243d6ae8d2d8 100644
>> --- a/arch/arm64/kernel/patching.c
>> +++ b/arch/arm64/kernel/patching.c
>> @@ -105,6 +105,47 @@ noinstr int aarch64_insn_write_literal_u64(void *addr, u64 val)
>>   	return ret;
>>   }
>>   
>> +/**
>> + * aarch64_insn_copy - Copy instructions into (an unused part of) RX memory
>> + * @dst: address to modify
>> + * @src: source of the copy
>> + * @len: length to copy
>> + *
>> + * Useful for JITs to dump new code blocks into unused regions of RX memory.
>> + */
>> +noinstr void *aarch64_insn_copy(void *dst, const void *src, size_t len)
>> +{
>> +	unsigned long flags;
>> +	size_t patched = 0;
>> +	size_t size;
>> +	void *waddr;
>> +	void *ptr;
>> +	int ret;
>> +
>
> check whether the input address and length are aligned to instruction size?

Will add a check that dst is aligned to instruction size and len is a
multiple of instruction size.

>
>> +	raw_spin_lock_irqsave(&patch_lock, flags);
>> +
>> +	while (patched < len) {
>> +		ptr = dst + patched;
>> +		size = min_t(size_t, PAGE_SIZE - offset_in_page(ptr),
>> +			     len - patched);
>> +
>> +		waddr = patch_map(ptr, FIX_TEXT_POKE0);
>> +		ret = copy_to_kernel_nofault(waddr, src + patched, size);
>> +		patch_unmap(FIX_TEXT_POKE0);
>> +
>> +		if (ret < 0) {
>> +			raw_spin_unlock_irqrestore(&patch_lock, flags);
>> +			return NULL;
>> +		}
>> +		patched += size;
>> +	}
>> +	raw_spin_unlock_irqrestore(&patch_lock, flags);
>> +
>> +	caches_clean_inval_pou((uintptr_t)dst, (uintptr_t)dst + len);
>> +
>
> seems flush_icache_range() or something like should be called here to
> ensure the other CPUs' pipelines are cleared, otherwise the old instructions
> at the dst address might be executed on other CPUs after the copy is complete,
> which is not expected.

Sure, I will use flush_icache_range() in place of
caches_clean_inval_pou() in the next version 

>
>> +	return dst;
>> +}
>> +
>>   int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
>>   {
>>   	u32 *tp = addr;
Mark Rutland Nov. 2, 2023, 4:19 p.m. UTC | #3
Hi Puranjay,

On Fri, Sep 08, 2023 at 02:43:18PM +0000, Puranjay Mohan wrote:
> This will be used by BPF JIT compiler to dump JITed binary to a RX huge
> page, and thus allow multiple BPF programs sharing the a huge (2MB)
> page.
> 
> The bpf_prog_pack allocator that implements the above feature allocates
> a RX/RW buffer pair. The JITed code is written to the RW buffer and then
> this function will be used to copy the code from RW to RX buffer.
> 
> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
> Acked-by: Song Liu <song@kernel.org>
> ---
>  arch/arm64/include/asm/patching.h |  1 +
>  arch/arm64/kernel/patching.c      | 41 +++++++++++++++++++++++++++++++
>  2 files changed, 42 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/patching.h
> index 68908b82b168..f78a0409cbdb 100644
> --- a/arch/arm64/include/asm/patching.h
> +++ b/arch/arm64/include/asm/patching.h
> @@ -8,6 +8,7 @@ int aarch64_insn_read(void *addr, u32 *insnp);
>  int aarch64_insn_write(void *addr, u32 insn);
>  
>  int aarch64_insn_write_literal_u64(void *addr, u64 val);
> +void *aarch64_insn_copy(void *dst, const void *src, size_t len);
>  
>  int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
>  int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
> diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
> index b4835f6d594b..243d6ae8d2d8 100644
> --- a/arch/arm64/kernel/patching.c
> +++ b/arch/arm64/kernel/patching.c
> @@ -105,6 +105,47 @@ noinstr int aarch64_insn_write_literal_u64(void *addr, u64 val)
>  	return ret;
>  }
>  
> +/**
> + * aarch64_insn_copy - Copy instructions into (an unused part of) RX memory
> + * @dst: address to modify
> + * @src: source of the copy
> + * @len: length to copy
> + *
> + * Useful for JITs to dump new code blocks into unused regions of RX memory.
> + */
> +noinstr void *aarch64_insn_copy(void *dst, const void *src, size_t len)
> +{
> +	unsigned long flags;
> +	size_t patched = 0;
> +	size_t size;
> +	void *waddr;
> +	void *ptr;
> +	int ret;
> +
> +	raw_spin_lock_irqsave(&patch_lock, flags);
> +
> +	while (patched < len) {
> +		ptr = dst + patched;
> +		size = min_t(size_t, PAGE_SIZE - offset_in_page(ptr),
> +			     len - patched);
> +
> +		waddr = patch_map(ptr, FIX_TEXT_POKE0);
> +		ret = copy_to_kernel_nofault(waddr, src + patched, size);
> +		patch_unmap(FIX_TEXT_POKE0);
> +
> +		if (ret < 0) {
> +			raw_spin_unlock_irqrestore(&patch_lock, flags);
> +			return NULL;
> +		}
> +		patched += size;
> +	}
> +	raw_spin_unlock_irqrestore(&patch_lock, flags);
> +
> +	caches_clean_inval_pou((uintptr_t)dst, (uintptr_t)dst + len);

As Xu mentioned, either this needs to use flush_icache_range() to IPI all CPUs
in the system, or we need to make it the caller's responsibility to do that.

Otherwise, I think this is functionally ok, but I'm not certain that it's good
for BPF to be using the FIX_TEXT_POKE0 slot as that will serialize all BPF
loading, ftrace, kprobes, etc against one another. Do we ever expect to load
multiple BPF programs in parallel, or is that serialized at a higher level?

Thanks,
Mark.
Alexei Starovoitov Nov. 2, 2023, 5:41 p.m. UTC | #4
On Thu, Nov 2, 2023 at 9:19 AM Mark Rutland <mark.rutland@arm.com> wrote:
>
> Hi Puranjay,
>
> On Fri, Sep 08, 2023 at 02:43:18PM +0000, Puranjay Mohan wrote:
> > This will be used by BPF JIT compiler to dump JITed binary to a RX huge
> > page, and thus allow multiple BPF programs sharing the a huge (2MB)
> > page.
> >
> > The bpf_prog_pack allocator that implements the above feature allocates
> > a RX/RW buffer pair. The JITed code is written to the RW buffer and then
> > this function will be used to copy the code from RW to RX buffer.
> >
> > Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
> > Acked-by: Song Liu <song@kernel.org>
> > ---
> >  arch/arm64/include/asm/patching.h |  1 +
> >  arch/arm64/kernel/patching.c      | 41 +++++++++++++++++++++++++++++++
> >  2 files changed, 42 insertions(+)
> >
> > diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/patching.h
> > index 68908b82b168..f78a0409cbdb 100644
> > --- a/arch/arm64/include/asm/patching.h
> > +++ b/arch/arm64/include/asm/patching.h
> > @@ -8,6 +8,7 @@ int aarch64_insn_read(void *addr, u32 *insnp);
> >  int aarch64_insn_write(void *addr, u32 insn);
> >
> >  int aarch64_insn_write_literal_u64(void *addr, u64 val);
> > +void *aarch64_insn_copy(void *dst, const void *src, size_t len);
> >
> >  int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
> >  int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
> > diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
> > index b4835f6d594b..243d6ae8d2d8 100644
> > --- a/arch/arm64/kernel/patching.c
> > +++ b/arch/arm64/kernel/patching.c
> > @@ -105,6 +105,47 @@ noinstr int aarch64_insn_write_literal_u64(void *addr, u64 val)
> >       return ret;
> >  }
> >
> > +/**
> > + * aarch64_insn_copy - Copy instructions into (an unused part of) RX memory
> > + * @dst: address to modify
> > + * @src: source of the copy
> > + * @len: length to copy
> > + *
> > + * Useful for JITs to dump new code blocks into unused regions of RX memory.
> > + */
> > +noinstr void *aarch64_insn_copy(void *dst, const void *src, size_t len)
> > +{
> > +     unsigned long flags;
> > +     size_t patched = 0;
> > +     size_t size;
> > +     void *waddr;
> > +     void *ptr;
> > +     int ret;
> > +
> > +     raw_spin_lock_irqsave(&patch_lock, flags);
> > +
> > +     while (patched < len) {
> > +             ptr = dst + patched;
> > +             size = min_t(size_t, PAGE_SIZE - offset_in_page(ptr),
> > +                          len - patched);
> > +
> > +             waddr = patch_map(ptr, FIX_TEXT_POKE0);
> > +             ret = copy_to_kernel_nofault(waddr, src + patched, size);
> > +             patch_unmap(FIX_TEXT_POKE0);
> > +
> > +             if (ret < 0) {
> > +                     raw_spin_unlock_irqrestore(&patch_lock, flags);
> > +                     return NULL;
> > +             }
> > +             patched += size;
> > +     }
> > +     raw_spin_unlock_irqrestore(&patch_lock, flags);
> > +
> > +     caches_clean_inval_pou((uintptr_t)dst, (uintptr_t)dst + len);
>
> As Xu mentioned, either this needs to use flush_icache_range() to IPI all CPUs
> in the system, or we need to make it the caller's responsibility to do that.
>
> Otherwise, I think this is functionally ok, but I'm not certain that it's good
> for BPF to be using the FIX_TEXT_POKE0 slot as that will serialize all BPF
> loading, ftrace, kprobes, etc against one another. Do we ever expect to load
> multiple BPF programs in parallel, or is that serialized at a higher level?

bpf loading is pretty much serialized by the verifier.
It's a very slow operation.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/patching.h
index 68908b82b168..f78a0409cbdb 100644
--- a/arch/arm64/include/asm/patching.h
+++ b/arch/arm64/include/asm/patching.h
@@ -8,6 +8,7 @@  int aarch64_insn_read(void *addr, u32 *insnp);
 int aarch64_insn_write(void *addr, u32 insn);
 
 int aarch64_insn_write_literal_u64(void *addr, u64 val);
+void *aarch64_insn_copy(void *dst, const void *src, size_t len);
 
 int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
 int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
index b4835f6d594b..243d6ae8d2d8 100644
--- a/arch/arm64/kernel/patching.c
+++ b/arch/arm64/kernel/patching.c
@@ -105,6 +105,47 @@  noinstr int aarch64_insn_write_literal_u64(void *addr, u64 val)
 	return ret;
 }
 
+/**
+ * aarch64_insn_copy - Copy instructions into (an unused part of) RX memory
+ * @dst: address to modify
+ * @src: source of the copy
+ * @len: length to copy
+ *
+ * Useful for JITs to dump new code blocks into unused regions of RX memory.
+ */
+noinstr void *aarch64_insn_copy(void *dst, const void *src, size_t len)
+{
+	unsigned long flags;
+	size_t patched = 0;
+	size_t size;
+	void *waddr;
+	void *ptr;
+	int ret;
+
+	raw_spin_lock_irqsave(&patch_lock, flags);
+
+	while (patched < len) {
+		ptr = dst + patched;
+		size = min_t(size_t, PAGE_SIZE - offset_in_page(ptr),
+			     len - patched);
+
+		waddr = patch_map(ptr, FIX_TEXT_POKE0);
+		ret = copy_to_kernel_nofault(waddr, src + patched, size);
+		patch_unmap(FIX_TEXT_POKE0);
+
+		if (ret < 0) {
+			raw_spin_unlock_irqrestore(&patch_lock, flags);
+			return NULL;
+		}
+		patched += size;
+	}
+	raw_spin_unlock_irqrestore(&patch_lock, flags);
+
+	caches_clean_inval_pou((uintptr_t)dst, (uintptr_t)dst + len);
+
+	return dst;
+}
+
 int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
 {
 	u32 *tp = addr;