diff mbox series

[v2,4/5] bpf ppc32: add support for BPF_ATOMIC bitwise operations

Message ID 20220610155552.25892-5-hbathini@linux.ibm.com (mailing list archive)
State Not Applicable
Delegated to: BPF
Headers show
Series Atomics support for eBPF on powerpc | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for Kernel LATEST on ubuntu-latest with gcc
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Kernel LATEST on ubuntu-latest with llvm-15
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Kernel LATEST on z15 with gcc

Commit Message

Hari Bathini June 10, 2022, 3:55 p.m. UTC
Adding instructions for ppc32 for

atomic_and
atomic_or
atomic_xor
atomic_fetch_add
atomic_fetch_and
atomic_fetch_or
atomic_fetch_xor

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---

Changes in v2:
* Used an additional register (BPF_REG_AX)
    - to avoid clobbering src_reg.
    - to keep the lwarx reservation as intended.
    - to avoid the odd switch/goto construct.
* Zero'ed out the higher 32-bit explicitly when required.

 arch/powerpc/net/bpf_jit_comp32.c | 53 ++++++++++++++++++++++++-------
 1 file changed, 41 insertions(+), 12 deletions(-)

Comments

Christophe Leroy June 11, 2022, 5:14 p.m. UTC | #1
Le 10/06/2022 à 17:55, Hari Bathini a écrit :
> Adding instructions for ppc32 for
> 
> atomic_and
> atomic_or
> atomic_xor
> atomic_fetch_add
> atomic_fetch_and
> atomic_fetch_or
> atomic_fetch_xor
> 
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
> 
> Changes in v2:
> * Used an additional register (BPF_REG_AX)
>      - to avoid clobbering src_reg.
>      - to keep the lwarx reservation as intended.
>      - to avoid the odd switch/goto construct.

Might be a stupid question as I don't know the internals of BPF: Are we 
sure BPF_REG_AX cannot be the src reg or the dst reg ?


> * Zero'ed out the higher 32-bit explicitly when required.
> 
>   arch/powerpc/net/bpf_jit_comp32.c | 53 ++++++++++++++++++++++++-------
>   1 file changed, 41 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index e46ed1e8c6ca..28dc6a1a8f2f 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -294,6 +294,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   		u32 dst_reg_h = dst_reg - 1;
>   		u32 src_reg = bpf_to_ppc(insn[i].src_reg);
>   		u32 src_reg_h = src_reg - 1;
> +		u32 ax_reg = bpf_to_ppc(BPF_REG_AX);
>   		u32 tmp_reg = bpf_to_ppc(TMP_REG);
>   		u32 size = BPF_SIZE(code);
>   		s16 off = insn[i].off;
> @@ -798,25 +799,53 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   		 * BPF_STX ATOMIC (atomic ops)
>   		 */
>   		case BPF_STX | BPF_ATOMIC | BPF_W:
> -			if (imm != BPF_ADD) {
> -				pr_err_ratelimited("eBPF filter atomic op code %02x (@%d) unsupported\n",
> -						   code, i);
> -				return -ENOTSUPP;
> -			}
> -
> -			/* *(u32 *)(dst + off) += src */
> -
>   			bpf_set_seen_register(ctx, tmp_reg);
> +			bpf_set_seen_register(ctx, ax_reg);
> +
>   			/* Get offset into TMP_REG */
>   			EMIT(PPC_RAW_LI(tmp_reg, off));
> +			tmp_idx = ctx->idx * 4;
>   			/* load value from memory into r0 */
>   			EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0));
> -			/* add value from src_reg into this */
> -			EMIT(PPC_RAW_ADD(_R0, _R0, src_reg));
> -			/* store result back */
> +
> +			/* Save old value in BPF_REG_AX */
> +			if (imm & BPF_FETCH)
> +				EMIT(PPC_RAW_MR(ax_reg, _R0));
> +
> +			switch (imm) {
> +			case BPF_ADD:
> +			case BPF_ADD | BPF_FETCH:
> +				EMIT(PPC_RAW_ADD(_R0, _R0, src_reg));
> +				break;
> +			case BPF_AND:
> +			case BPF_AND | BPF_FETCH:
> +				EMIT(PPC_RAW_AND(_R0, _R0, src_reg));
> +				break;
> +			case BPF_OR:
> +			case BPF_OR | BPF_FETCH:
> +				EMIT(PPC_RAW_OR(_R0, _R0, src_reg));
> +				break;
> +			case BPF_XOR:
> +			case BPF_XOR | BPF_FETCH:
> +				EMIT(PPC_RAW_XOR(_R0, _R0, src_reg));
> +				break;
> +			default:
> +				pr_err_ratelimited("eBPF filter atomic op code %02x (@%d) unsupported\n",
> +						   code, i);
> +				return -EOPNOTSUPP;
> +			}
> +
> +			/* store new value */
>   			EMIT(PPC_RAW_STWCX(_R0, tmp_reg, dst_reg));
>   			/* we're done if this succeeded */
> -			PPC_BCC_SHORT(COND_NE, (ctx->idx - 3) * 4);
> +			PPC_BCC_SHORT(COND_NE, tmp_idx);
> +
> +			/* For the BPF_FETCH variant, get old data into src_reg */
> +			if (imm & BPF_FETCH) {
> +				EMIT(PPC_RAW_MR(src_reg, ax_reg));
> +				if (!fp->aux->verifier_zext)
> +					EMIT(PPC_RAW_LI(src_reg_h, 0));
> +			}
>   			break;
>   
>   		case BPF_STX | BPF_ATOMIC | BPF_DW: /* *(u64 *)(dst + off) += src */
Hari Bathini June 13, 2022, 7 p.m. UTC | #2
On 11/06/22 10:44 pm, Christophe Leroy wrote:
> 
> 
> Le 10/06/2022 à 17:55, Hari Bathini a écrit :
>> Adding instructions for ppc32 for
>>
>> atomic_and
>> atomic_or
>> atomic_xor
>> atomic_fetch_add
>> atomic_fetch_and
>> atomic_fetch_or
>> atomic_fetch_xor
>>
>> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
>> ---
>>
>> Changes in v2:
>> * Used an additional register (BPF_REG_AX)
>>       - to avoid clobbering src_reg.
>>       - to keep the lwarx reservation as intended.
>>       - to avoid the odd switch/goto construct.
> 
> Might be a stupid question as I don't know the internals of BPF: Are we
> sure BPF_REG_AX cannot be the src reg or the dst reg ?
> 

AFAICS, BPF_REG_AX wouldn't be used as src_reg or dst_reg unless this
code is reused internally, by arch-specific code, for JIT'ing some other
instruction(s) using BPF_REG_AX as either src or dst reg..

Thanks
Hari
diff mbox series

Patch

diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index e46ed1e8c6ca..28dc6a1a8f2f 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -294,6 +294,7 @@  int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		u32 dst_reg_h = dst_reg - 1;
 		u32 src_reg = bpf_to_ppc(insn[i].src_reg);
 		u32 src_reg_h = src_reg - 1;
+		u32 ax_reg = bpf_to_ppc(BPF_REG_AX);
 		u32 tmp_reg = bpf_to_ppc(TMP_REG);
 		u32 size = BPF_SIZE(code);
 		s16 off = insn[i].off;
@@ -798,25 +799,53 @@  int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		 * BPF_STX ATOMIC (atomic ops)
 		 */
 		case BPF_STX | BPF_ATOMIC | BPF_W:
-			if (imm != BPF_ADD) {
-				pr_err_ratelimited("eBPF filter atomic op code %02x (@%d) unsupported\n",
-						   code, i);
-				return -ENOTSUPP;
-			}
-
-			/* *(u32 *)(dst + off) += src */
-
 			bpf_set_seen_register(ctx, tmp_reg);
+			bpf_set_seen_register(ctx, ax_reg);
+
 			/* Get offset into TMP_REG */
 			EMIT(PPC_RAW_LI(tmp_reg, off));
+			tmp_idx = ctx->idx * 4;
 			/* load value from memory into r0 */
 			EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0));
-			/* add value from src_reg into this */
-			EMIT(PPC_RAW_ADD(_R0, _R0, src_reg));
-			/* store result back */
+
+			/* Save old value in BPF_REG_AX */
+			if (imm & BPF_FETCH)
+				EMIT(PPC_RAW_MR(ax_reg, _R0));
+
+			switch (imm) {
+			case BPF_ADD:
+			case BPF_ADD | BPF_FETCH:
+				EMIT(PPC_RAW_ADD(_R0, _R0, src_reg));
+				break;
+			case BPF_AND:
+			case BPF_AND | BPF_FETCH:
+				EMIT(PPC_RAW_AND(_R0, _R0, src_reg));
+				break;
+			case BPF_OR:
+			case BPF_OR | BPF_FETCH:
+				EMIT(PPC_RAW_OR(_R0, _R0, src_reg));
+				break;
+			case BPF_XOR:
+			case BPF_XOR | BPF_FETCH:
+				EMIT(PPC_RAW_XOR(_R0, _R0, src_reg));
+				break;
+			default:
+				pr_err_ratelimited("eBPF filter atomic op code %02x (@%d) unsupported\n",
+						   code, i);
+				return -EOPNOTSUPP;
+			}
+
+			/* store new value */
 			EMIT(PPC_RAW_STWCX(_R0, tmp_reg, dst_reg));
 			/* we're done if this succeeded */
-			PPC_BCC_SHORT(COND_NE, (ctx->idx - 3) * 4);
+			PPC_BCC_SHORT(COND_NE, tmp_idx);
+
+			/* For the BPF_FETCH variant, get old data into src_reg */
+			if (imm & BPF_FETCH) {
+				EMIT(PPC_RAW_MR(src_reg, ax_reg));
+				if (!fp->aux->verifier_zext)
+					EMIT(PPC_RAW_LI(src_reg_h, 0));
+			}
 			break;
 
 		case BPF_STX | BPF_ATOMIC | BPF_DW: /* *(u64 *)(dst + off) += src */