diff mbox series

[2/2] MIPS: ebpf jit: Implement R4000 workarounds

Message ID 20230222161222.11879-3-jiaxun.yang@flygoat.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series MIPS: Implement two workarounds for BPF JIT | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 fail Logs for test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_progs_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_progs_parallel on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-37 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for test_verifier on x86_64 with llvm-17

Commit Message

Jiaxun Yang Feb. 22, 2023, 4:12 p.m. UTC
For R4000 erratas around multiplication and division instructions,
as our use of those instructions are always followed by mflo/mfhi
instructions, the only issue we need care is

"MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0" Errata 28:
"A double-word or a variable shift may give an incorrect result if
executed while an integer multiplication is in progress."

We just emit a mfhi $0 to ensure the operation is completed after
every multiplication instruction accorading to workaround suggestion
in the document.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
---
 arch/mips/Kconfig              | 4 +---
 arch/mips/net/bpf_jit_comp32.c | 4 ++++
 arch/mips/net/bpf_jit_comp64.c | 3 +++
 3 files changed, 8 insertions(+), 3 deletions(-)

Comments

Johan Almbladh Feb. 23, 2023, 10:22 a.m. UTC | #1
On Wed, Feb 22, 2023 at 5:12 PM Jiaxun Yang <jiaxun.yang@flygoat.com> wrote:
>
> For R4000 erratas around multiplication and division instructions,
> as our use of those instructions are always followed by mflo/mfhi
> instructions, the only issue we need care is
>
> "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0" Errata 28:
> "A double-word or a variable shift may give an incorrect result if
> executed while an integer multiplication is in progress."
>
> We just emit a mfhi $0 to ensure the operation is completed after
> every multiplication instruction accorading to workaround suggestion
> in the document.
>
> Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
> ---
>  arch/mips/Kconfig              | 4 +---
>  arch/mips/net/bpf_jit_comp32.c | 4 ++++
>  arch/mips/net/bpf_jit_comp64.c | 3 +++
>  3 files changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index df0910e3895c..5ea07c833c5b 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -63,9 +63,7 @@ config MIPS
>         select HAVE_DEBUG_STACKOVERFLOW
>         select HAVE_DMA_CONTIGUOUS
>         select HAVE_DYNAMIC_FTRACE
> -       select HAVE_EBPF_JIT if !CPU_MICROMIPS && \
> -                               !CPU_R4000_WORKAROUNDS && \
> -                               !CPU_R4400_WORKAROUNDS

Is the R4400 errata also covered by this workaround?

> +       select HAVE_EBPF_JIT if !CPU_MICROMIPS
>         select HAVE_EXIT_THREAD
>         select HAVE_FAST_GUP
>         select HAVE_FTRACE_MCOUNT_RECORD
> diff --git a/arch/mips/net/bpf_jit_comp32.c b/arch/mips/net/bpf_jit_comp32.c
> index ace5db3fbd17..fee334544d2f 100644
> --- a/arch/mips/net/bpf_jit_comp32.c
> +++ b/arch/mips/net/bpf_jit_comp32.c
> @@ -446,6 +446,9 @@ static void emit_mul_i64(struct jit_context *ctx, const u8 dst[], s32 imm)
>                 } else {
>                         emit(ctx, multu, hi(dst), src);
>                         emit(ctx, mflo, hi(dst));
> +                       /* Ensure multiplication is completed */
> +                       if (IS_ENABLED(CONFIG_CPU_R4000_WORKAROUNDS))
> +                               emit(ctx, mfhi, MIPS_R_ZERO);
>                 }
>
>                 /* hi(dst) = hi(dst) - lo(dst) */
> @@ -504,6 +507,7 @@ static void emit_mul_r64(struct jit_context *ctx,
>         } else {
>                 emit(ctx, multu, lo(dst), lo(src));
>                 emit(ctx, mflo, lo(dst));
> +               /* No need for workaround because we have this mfhi */
>                 emit(ctx, mfhi, tmp);
>         }

R4000 is a 64-bit CPU, so the 32-bit JIT implementation will not be
used. From the Makefile:

ifeq ($(CONFIG_32BIT),y)
        obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
else
        obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o
endif

>
> diff --git a/arch/mips/net/bpf_jit_comp64.c b/arch/mips/net/bpf_jit_comp64.c
> index 0e7c1bdcf914..5f5a93f997bc 100644
> --- a/arch/mips/net/bpf_jit_comp64.c
> +++ b/arch/mips/net/bpf_jit_comp64.c
> @@ -228,6 +228,9 @@ static void emit_alu_r64(struct jit_context *ctx, u8 dst, u8 src, u8 op)
>                 } else {
>                         emit(ctx, dmultu, dst, src);
>                         emit(ctx, mflo, dst);
> +                       /* Ensure multiplication is completed */
> +                       if (IS_ENABLED(CONFIG_CPU_R4000_WORKAROUNDS))
> +                               emit(ctx, mfhi, MIPS_R_ZERO);
>                 }
>                 break;
>         /* dst = dst / src */
> --
> 2.37.1 (Apple Git-137.1)
>
Jiaxun Yang Feb. 23, 2023, 10:31 a.m. UTC | #2
> 2023年2月23日 10:22,Johan Almbladh <johan.almbladh@anyfinetworks.com> 写道:
> 
> On Wed, Feb 22, 2023 at 5:12 PM Jiaxun Yang <jiaxun.yang@flygoat.com> wrote:
>> 
>> For R4000 erratas around multiplication and division instructions,
>> as our use of those instructions are always followed by mflo/mfhi
>> instructions, the only issue we need care is
>> 
>> "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0" Errata 28:
>> "A double-word or a variable shift may give an incorrect result if
>> executed while an integer multiplication is in progress."
>> 
>> We just emit a mfhi $0 to ensure the operation is completed after
>> every multiplication instruction accorading to workaround suggestion
>> in the document.
>> 
>> Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
>> ---
>> arch/mips/Kconfig              | 4 +---
>> arch/mips/net/bpf_jit_comp32.c | 4 ++++
>> arch/mips/net/bpf_jit_comp64.c | 3 +++
>> 3 files changed, 8 insertions(+), 3 deletions(-)
>> 
>> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
>> index df0910e3895c..5ea07c833c5b 100644
>> --- a/arch/mips/Kconfig
>> +++ b/arch/mips/Kconfig
>> @@ -63,9 +63,7 @@ config MIPS
>>        select HAVE_DEBUG_STACKOVERFLOW
>>        select HAVE_DMA_CONTIGUOUS
>>        select HAVE_DYNAMIC_FTRACE
>> -       select HAVE_EBPF_JIT if !CPU_MICROMIPS && \
>> -                               !CPU_R4000_WORKAROUNDS && \
>> -                               !CPU_R4400_WORKAROUNDS
> 
> Is the R4400 errata also covered by this workaround?

Yes, R4400 errata is basically a reduced version of R4000 one.
They managed to fix some parts of the issue but not all.

> 
>> +       select HAVE_EBPF_JIT if !CPU_MICROMIPS
>>        select HAVE_EXIT_THREAD
>>        select HAVE_FAST_GUP
>>        select HAVE_FTRACE_MCOUNT_RECORD
>> diff --git a/arch/mips/net/bpf_jit_comp32.c b/arch/mips/net/bpf_jit_comp32.c
>> index ace5db3fbd17..fee334544d2f 100644
>> --- a/arch/mips/net/bpf_jit_comp32.c
>> +++ b/arch/mips/net/bpf_jit_comp32.c
>> @@ -446,6 +446,9 @@ static void emit_mul_i64(struct jit_context *ctx, const u8 dst[], s32 imm)
>>                } else {
>>                        emit(ctx, multu, hi(dst), src);
>>                        emit(ctx, mflo, hi(dst));
>> +                       /* Ensure multiplication is completed */
>> +                       if (IS_ENABLED(CONFIG_CPU_R4000_WORKAROUNDS))
>> +                               emit(ctx, mfhi, MIPS_R_ZERO);
>>                }
>> 
>>                /* hi(dst) = hi(dst) - lo(dst) */
>> @@ -504,6 +507,7 @@ static void emit_mul_r64(struct jit_context *ctx,
>>        } else {
>>                emit(ctx, multu, lo(dst), lo(src));
>>                emit(ctx, mflo, lo(dst));
>> +               /* No need for workaround because we have this mfhi */
>>                emit(ctx, mfhi, tmp);
>>        }
> 
> R4000 is a 64-bit CPU, so the 32-bit JIT implementation will not be
> used. From the Makefile:
> 
> ifeq ($(CONFIG_32BIT),y)
>        obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
> else
>        obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o
> endif

It’s common practice to run 32-bit kernel on R4000 based systems to save some memory :-)

Thanks.
- Jiaxun

> 
>> 
>> diff --git a/arch/mips/net/bpf_jit_comp64.c b/arch/mips/net/bpf_jit_comp64.c
>> index 0e7c1bdcf914..5f5a93f997bc 100644
>> --- a/arch/mips/net/bpf_jit_comp64.c
>> +++ b/arch/mips/net/bpf_jit_comp64.c
>> @@ -228,6 +228,9 @@ static void emit_alu_r64(struct jit_context *ctx, u8 dst, u8 src, u8 op)
>>                } else {
>>                        emit(ctx, dmultu, dst, src);
>>                        emit(ctx, mflo, dst);
>> +                       /* Ensure multiplication is completed */
>> +                       if (IS_ENABLED(CONFIG_CPU_R4000_WORKAROUNDS))
>> +                               emit(ctx, mfhi, MIPS_R_ZERO);
>>                }
>>                break;
>>        /* dst = dst / src */
>> --
>> 2.37.1 (Apple Git-137.1)
Philippe Mathieu-Daudé Feb. 26, 2023, 10:09 p.m. UTC | #3
On 22/2/23 17:12, Jiaxun Yang wrote:
> For R4000 erratas around multiplication and division instructions,
> as our use of those instructions are always followed by mflo/mfhi
> instructions, the only issue we need care is
> 
> "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0" Errata 28:
> "A double-word or a variable shift may give an incorrect result if
> executed while an integer multiplication is in progress."
> 
> We just emit a mfhi $0 to ensure the operation is completed after
> every multiplication instruction accorading to workaround suggestion

Typo "according"

> in the document.
> 
> Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
> ---
>   arch/mips/Kconfig              | 4 +---
>   arch/mips/net/bpf_jit_comp32.c | 4 ++++
>   arch/mips/net/bpf_jit_comp64.c | 3 +++
>   3 files changed, 8 insertions(+), 3 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Johan Almbladh Feb. 27, 2023, 12:19 p.m. UTC | #4
On Thu, Feb 23, 2023 at 11:32 AM Jiaxun Yang <jiaxun.yang@flygoat.com> wrote:
> >> --- a/arch/mips/Kconfig
> >> +++ b/arch/mips/Kconfig
> >> @@ -63,9 +63,7 @@ config MIPS
> >>        select HAVE_DEBUG_STACKOVERFLOW
> >>        select HAVE_DMA_CONTIGUOUS
> >>        select HAVE_DYNAMIC_FTRACE
> >> -       select HAVE_EBPF_JIT if !CPU_MICROMIPS && \
> >> -                               !CPU_R4000_WORKAROUNDS && \
> >> -                               !CPU_R4400_WORKAROUNDS
> >
> > Is the R4400 errata also covered by this workaround?
>
> Yes, R4400 errata is basically a reduced version of R4000 one.
> They managed to fix some parts of the issue but not all.

Ok.

> >> --- a/arch/mips/net/bpf_jit_comp32.c
> >> +++ b/arch/mips/net/bpf_jit_comp32.c
> >> @@ -446,6 +446,9 @@ static void emit_mul_i64(struct jit_context *ctx, const u8 dst[], s32 imm)
> >>                } else {
> >>                        emit(ctx, multu, hi(dst), src);
> >>                        emit(ctx, mflo, hi(dst));
> >> +                       /* Ensure multiplication is completed */
> >> +                       if (IS_ENABLED(CONFIG_CPU_R4000_WORKAROUNDS))
> >> +                               emit(ctx, mfhi, MIPS_R_ZERO);
> >>                }
> >>
> >>                /* hi(dst) = hi(dst) - lo(dst) */
> >> @@ -504,6 +507,7 @@ static void emit_mul_r64(struct jit_context *ctx,
> >>        } else {
> >>                emit(ctx, multu, lo(dst), lo(src));
> >>                emit(ctx, mflo, lo(dst));
> >> +               /* No need for workaround because we have this mfhi */

For context, please specify which workaround this comment refers to:
"workaround" -> "R4000 workaround".

> > R4000 is a 64-bit CPU, so the 32-bit JIT implementation will not be
> > used. From the Makefile:
> >
> > ifeq ($(CONFIG_32BIT),y)
> >        obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
> > else
> >        obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o
> > endif
>
> It’s common practice to run 32-bit kernel on R4000 based systems to save some memory :-)

Ok, I understand.

Looks good! I have run the test_bpf.ko test suite on MIPS and MIPS64
in QEMU with and without the workarounds enabled.

With above comment fix:
Acked-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Maciej W. Rozycki Feb. 27, 2023, 3:18 p.m. UTC | #5
On Mon, 27 Feb 2023, Johan Almbladh wrote:

> > > R4000 is a 64-bit CPU, so the 32-bit JIT implementation will not be
> > > used. From the Makefile:
> > >
> > > ifeq ($(CONFIG_32BIT),y)
> > >        obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
> > > else
> > >        obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o
> > > endif
> >
> > It’s common practice to run 32-bit kernel on R4000 based systems to save some memory :-)
> 
> Ok, I understand.

 Likewise:

	select CPU_R4000_WORKAROUNDS if 64BIT
	select CPU_R4400_WORKAROUNDS if 64BIT

This only applies to 64-bit operations, which are not used in 32-bit code 
(one reason why these early silicon revisions were originally used with 
32-bit software only).

  Maciej
Jiaxun Yang Feb. 27, 2023, 3:47 p.m. UTC | #6
在2023年2月27日二月 下午3:18,Maciej W. Rozycki写道:
> On Mon, 27 Feb 2023, Johan Almbladh wrote:
>
>> > > R4000 is a 64-bit CPU, so the 32-bit JIT implementation will not be
>> > > used. From the Makefile:
>> > >
>> > > ifeq ($(CONFIG_32BIT),y)
>> > >        obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
>> > > else
>> > >        obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o
>> > > endif
>> >
>> > It’s common practice to run 32-bit kernel on R4000 based systems to save some memory :-)
>> 
>> Ok, I understand.
>
>  Likewise:
>
> 	select CPU_R4000_WORKAROUNDS if 64BIT
> 	select CPU_R4400_WORKAROUNDS if 64BIT
>
> This only applies to 64-bit operations, which are not used in 32-bit code 
> (one reason why these early silicon revisions were originally used with 
> 32-bit software only).

Thanks for the info.
Will drop 32bit part from both patch.

>
>   Maciej
diff mbox series

Patch

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index df0910e3895c..5ea07c833c5b 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -63,9 +63,7 @@  config MIPS
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_DYNAMIC_FTRACE
-	select HAVE_EBPF_JIT if !CPU_MICROMIPS && \
-				!CPU_R4000_WORKAROUNDS && \
-				!CPU_R4400_WORKAROUNDS
+	select HAVE_EBPF_JIT if !CPU_MICROMIPS
 	select HAVE_EXIT_THREAD
 	select HAVE_FAST_GUP
 	select HAVE_FTRACE_MCOUNT_RECORD
diff --git a/arch/mips/net/bpf_jit_comp32.c b/arch/mips/net/bpf_jit_comp32.c
index ace5db3fbd17..fee334544d2f 100644
--- a/arch/mips/net/bpf_jit_comp32.c
+++ b/arch/mips/net/bpf_jit_comp32.c
@@ -446,6 +446,9 @@  static void emit_mul_i64(struct jit_context *ctx, const u8 dst[], s32 imm)
 		} else {
 			emit(ctx, multu, hi(dst), src);
 			emit(ctx, mflo, hi(dst));
+			/* Ensure multiplication is completed */
+			if (IS_ENABLED(CONFIG_CPU_R4000_WORKAROUNDS))
+				emit(ctx, mfhi, MIPS_R_ZERO);
 		}
 
 		/* hi(dst) = hi(dst) - lo(dst) */
@@ -504,6 +507,7 @@  static void emit_mul_r64(struct jit_context *ctx,
 	} else {
 		emit(ctx, multu, lo(dst), lo(src));
 		emit(ctx, mflo, lo(dst));
+		/* No need for workaround because we have this mfhi */
 		emit(ctx, mfhi, tmp);
 	}
 
diff --git a/arch/mips/net/bpf_jit_comp64.c b/arch/mips/net/bpf_jit_comp64.c
index 0e7c1bdcf914..5f5a93f997bc 100644
--- a/arch/mips/net/bpf_jit_comp64.c
+++ b/arch/mips/net/bpf_jit_comp64.c
@@ -228,6 +228,9 @@  static void emit_alu_r64(struct jit_context *ctx, u8 dst, u8 src, u8 op)
 		} else {
 			emit(ctx, dmultu, dst, src);
 			emit(ctx, mflo, dst);
+			/* Ensure multiplication is completed */
+			if (IS_ENABLED(CONFIG_CPU_R4000_WORKAROUNDS))
+				emit(ctx, mfhi, MIPS_R_ZERO);
 		}
 		break;
 	/* dst = dst / src */