diff mbox series

[bpf-next,v1,3/8] bpf: Introduce load-acquire and store-release instructions

Message ID e52e4ab7bea5b29475d70e164c4b07992afd6033.1737763916.git.yepeilin@google.com (mailing list archive)
State New
Headers show
Series Introduce load-acquire and store-release BPF instructions | expand

Commit Message

Peilin Ye Jan. 25, 2025, 2:18 a.m. UTC
Introduce BPF instructions with load-acquire and store-release
semantics, as discussed in [1].  The following new flags are defined:

  BPF_ATOMIC_LOAD         0x10
  BPF_ATOMIC_STORE        0x20
  BPF_ATOMIC_TYPE(imm)    ((imm) & 0xf0)

  BPF_RELAXED        0x0
  BPF_ACQUIRE        0x1
  BPF_RELEASE        0x2
  BPF_ACQ_REL        0x3
  BPF_SEQ_CST        0x4

  BPF_LOAD_ACQ       (BPF_ATOMIC_LOAD | BPF_ACQUIRE)
  BPF_STORE_REL      (BPF_ATOMIC_STORE | BPF_RELEASE)

A "load-acquire" is a BPF_STX | BPF_ATOMIC instruction with the 'imm'
field set to BPF_LOAD_ACQ (0x11).

Similarly, a "store-release" is a BPF_STX | BPF_ATOMIC instruction with
the 'imm' field set to BPF_STORE_REL (0x22).

Unlike existing atomic operations that only support BPF_W (32-bit) and
BPF_DW (64-bit) size modifiers, load-acquires and store-releases also
support BPF_B (8-bit) and BPF_H (16-bit).  An 8- or 16-bit load-acquire
zero-extends the value before writing it to a 32-bit register, just like
ARM64 instruction LDARH and friends.

As an example, consider the following 64-bit load-acquire BPF
instruction:

  db 10 00 00 11 00 00 00  r0 = load_acquire((u64 *)(r1 + 0x0))

  opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX
  imm (0x00000011): BPF_LOAD_ACQ

Similarly, a 16-bit BPF store-release:

  cb 21 00 00 22 00 00 00  store_release((u16 *)(r1 + 0x0), w2)

  opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX
  imm (0x00000022): BPF_STORE_REL

[1] https://lore.kernel.org/all/20240729183246.4110549-1-yepeilin@google.com/

Signed-off-by: Peilin Ye <yepeilin@google.com>
---
 include/linux/filter.h         |  2 +
 include/uapi/linux/bpf.h       | 13 +++++
 kernel/bpf/core.c              | 41 +++++++++++++-
 kernel/bpf/disasm.c            | 12 +++++
 kernel/bpf/verifier.c          | 99 ++++++++++++++++++++++++++++++++--
 tools/include/uapi/linux/bpf.h | 13 +++++
 6 files changed, 175 insertions(+), 5 deletions(-)

Comments

Eduard Zingerman Jan. 29, 2025, 12:19 a.m. UTC | #1
On Sat, 2025-01-25 at 02:18 +0000, Peilin Ye wrote:
> Introduce BPF instructions with load-acquire and store-release
> semantics, as discussed in [1].  The following new flags are defined:
> 
>   BPF_ATOMIC_LOAD         0x10
>   BPF_ATOMIC_STORE        0x20
>   BPF_ATOMIC_TYPE(imm)    ((imm) & 0xf0)
> 
>   BPF_RELAXED        0x0
>   BPF_ACQUIRE        0x1
>   BPF_RELEASE        0x2
>   BPF_ACQ_REL        0x3
>   BPF_SEQ_CST        0x4
> 
>   BPF_LOAD_ACQ       (BPF_ATOMIC_LOAD | BPF_ACQUIRE)
>   BPF_STORE_REL      (BPF_ATOMIC_STORE | BPF_RELEASE)
> 
> A "load-acquire" is a BPF_STX | BPF_ATOMIC instruction with the 'imm'
> field set to BPF_LOAD_ACQ (0x11).
> 
> Similarly, a "store-release" is a BPF_STX | BPF_ATOMIC instruction with
> the 'imm' field set to BPF_STORE_REL (0x22).
> 
> Unlike existing atomic operations that only support BPF_W (32-bit) and
> BPF_DW (64-bit) size modifiers, load-acquires and store-releases also
> support BPF_B (8-bit) and BPF_H (16-bit).  An 8- or 16-bit load-acquire
> zero-extends the value before writing it to a 32-bit register, just like
> ARM64 instruction LDARH and friends.
> 
> As an example, consider the following 64-bit load-acquire BPF
> instruction:
> 
>   db 10 00 00 11 00 00 00  r0 = load_acquire((u64 *)(r1 + 0x0))
> 
>   opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX
>   imm (0x00000011): BPF_LOAD_ACQ
> 
> Similarly, a 16-bit BPF store-release:
> 
>   cb 21 00 00 22 00 00 00  store_release((u16 *)(r1 + 0x0), w2)
> 
>   opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX
>   imm (0x00000022): BPF_STORE_REL
> 
> [1] https://lore.kernel.org/all/20240729183246.4110549-1-yepeilin@google.com/
> 
> Signed-off-by: Peilin Ye <yepeilin@google.com>
> ---

I think bpf_jit_supports_insn() in arch/{x86,s390}/net/bpf_jit_comp.c
need an update, as both would accept BPF_LOAD_ACQ/BPF_STORE_REL at the
moment.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>

[...]

> +static int check_atomic_load(struct bpf_verifier_env *env, int insn_idx,
> +			     struct bpf_insn *insn)
> +{
> +	struct bpf_reg_state *regs = cur_regs(env);
> +	int err;
> +
> +	err = check_reg_arg(env, insn->src_reg, SRC_OP);
> +	if (err)
> +		return err;
> +
> +	err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
> +	if (err)
> +		return err;
> +
> +	if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) {
> +		verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n",
> +			insn->src_reg,
> +			reg_type_str(env, reg_state(env, insn->src_reg)->type));
> +		return -EACCES;
> +	}
> +
> +	if (is_arena_reg(env, insn->src_reg)) {
> +		err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
> +		if (err)
> +			return err;

Nit: this and the next function look very similar to processing of
     generic load and store in do_check(). Maybe extract that code
     as an auxiliary function and call it in both places?
     The only major difference is is_arena_reg() check guarding
     save_aux_ptr_type(), but I think it is ok to do save_aux_ptr_type
     unconditionally. Fwiw, the code would be a bit simpler,
     just spent half an hour convincing myself that such conditional handling
     is not an error. Wdyt?

> +	}
> +
> +	/* Check whether we can read the memory. */
> +	err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
> +			       BPF_SIZE(insn->code), BPF_READ, insn->dst_reg,
> +			       true, false);
> +	if (err)
> +		return err;
> +
> +	err = reg_bounds_sanity_check(env, &regs[insn->dst_reg], "atomic_load");
> +	if (err)
> +		return err;
> +	return 0;
> +}
> +
> +static int check_atomic_store(struct bpf_verifier_env *env, int insn_idx,
> +			      struct bpf_insn *insn)
> +{
> +	int err;
> +
> +	err = check_reg_arg(env, insn->src_reg, SRC_OP);
> +	if (err)
> +		return err;
> +
> +	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
> +	if (err)
> +		return err;
> +
> +	if (is_pointer_value(env, insn->src_reg)) {
> +		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
> +		return -EACCES;
> +	}
> +
> +	if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
> +		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
> +			insn->dst_reg,
> +			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
> +		return -EACCES;
> +	}
> +
> +	if (is_arena_reg(env, insn->dst_reg)) {
> +		err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
> +		if (err)
> +			return err;
> +	}
> +
> +	/* Check whether we can write into the memory. */
> +	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
> +			       BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg,
> +			       true, false);
> +	if (err)
> +		return err;
> +	return 0;
> +}
> +

[...]
Eduard Zingerman Jan. 29, 2025, 1:30 a.m. UTC | #2
On Sat, 2025-01-25 at 02:18 +0000, Peilin Ye wrote:

[...]

> +static int check_atomic_store(struct bpf_verifier_env *env, int insn_idx,
> +			      struct bpf_insn *insn)
> +{
> +	int err;
> +
> +	err = check_reg_arg(env, insn->src_reg, SRC_OP);
> +	if (err)
> +		return err;
> +
> +	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
> +	if (err)
> +		return err;
> +
> +	if (is_pointer_value(env, insn->src_reg)) {
> +		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
> +		return -EACCES;
> +	}

Nit: this check is done by check_mem_access(), albeit only for
     PTR_TO_MEM, I think it's better to be consistent with
     what happens for regular stores and avoid this check here.

> +
> +	if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
> +		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
> +			insn->dst_reg,
> +			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
> +		return -EACCES;
> +	}
> +
> +	if (is_arena_reg(env, insn->dst_reg)) {
> +		err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
> +		if (err)
> +			return err;
> +	}
> +
> +	/* Check whether we can write into the memory. */
> +	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
> +			       BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg,
> +			       true, false);
> +	if (err)
> +		return err;
> +	return 0;
> +}

[...]
Peilin Ye Jan. 29, 2025, 10:04 p.m. UTC | #3
On Tue, Jan 28, 2025 at 04:19:25PM -0800, Eduard Zingerman wrote:
> On Sat, 2025-01-25 at 02:18 +0000, Peilin Ye wrote:
> > Signed-off-by: Peilin Ye <yepeilin@google.com>
> > ---
> 
> I think bpf_jit_supports_insn() in arch/{x86,s390}/net/bpf_jit_comp.c
> need an update, as both would accept BPF_LOAD_ACQ/BPF_STORE_REL at the
> moment.

Got it - I will move is_atomic_load_store() into <linux/bpf.h> for that.

> Acked-by: Eduard Zingerman <eddyz87@gmail.com>

Thanks!

> > +static int check_atomic_load(struct bpf_verifier_env *env, int insn_idx,
> > +			     struct bpf_insn *insn)
> > +{
> > +	struct bpf_reg_state *regs = cur_regs(env);
> > +	int err;
> > +
> > +	err = check_reg_arg(env, insn->src_reg, SRC_OP);
> > +	if (err)
> > +		return err;
> > +
> > +	err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
> > +	if (err)
> > +		return err;
> > +
> > +	if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) {
> > +		verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n",
> > +			insn->src_reg,
> > +			reg_type_str(env, reg_state(env, insn->src_reg)->type));
> > +		return -EACCES;
> > +	}
> > +
> > +	if (is_arena_reg(env, insn->src_reg)) {
> > +		err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
> > +		if (err)
> > +			return err;
> 
> Nit: this and the next function look very similar to processing of
>      generic load and store in do_check(). Maybe extract that code
>      as an auxiliary function and call it in both places?

Sure, I agree that they look a bit repetitive.

>      The only major difference is is_arena_reg() check guarding
>      save_aux_ptr_type(), but I think it is ok to do save_aux_ptr_type
>      unconditionally. Fwiw, the code would be a bit simpler,
>      just spent half an hour convincing myself that such conditional handling
>      is not an error. Wdyt?

:-O

Thanks a lot for that; would you mind sharing a bit more on how you
reasoned about it (i.e., why is it OK to save_aux_ptr_type()
unconditionally) ?

> > +	}
> > +
> > +	/* Check whether we can read the memory. */
> > +	err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
> > +			       BPF_SIZE(insn->code), BPF_READ, insn->dst_reg,
> > +			       true, false);
> > +	if (err)
> > +		return err;
> > +
> > +	err = reg_bounds_sanity_check(env, &regs[insn->dst_reg], "atomic_load");
> > +	if (err)
> > +		return err;
> > +	return 0;
> > +}
> > +
> > +static int check_atomic_store(struct bpf_verifier_env *env, int insn_idx,
> > +			      struct bpf_insn *insn)

Thanks,
Peilin Ye
Peilin Ye Jan. 29, 2025, 10:17 p.m. UTC | #4
On Tue, Jan 28, 2025 at 05:30:19PM -0800, Eduard Zingerman wrote:
> On Sat, 2025-01-25 at 02:18 +0000, Peilin Ye wrote:
> > +static int check_atomic_store(struct bpf_verifier_env *env, int insn_idx,
> > +			      struct bpf_insn *insn)
> > +{
> > +	int err;
> > +
> > +	err = check_reg_arg(env, insn->src_reg, SRC_OP);
> > +	if (err)
> > +		return err;
> > +
> > +	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
> > +	if (err)
> > +		return err;
> > +
> > +	if (is_pointer_value(env, insn->src_reg)) {
> > +		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
> > +		return -EACCES;
> > +	}
> 
> Nit: this check is done by check_mem_access(), albeit only for
>      PTR_TO_MEM, I think it's better to be consistent with
>      what happens for regular stores and avoid this check here.

Got it.  Unprivileged programs will be able to store-release pointers to
the stack, then.  I'll update selftests accordingly.

Thanks,
Peilin Ye
Eduard Zingerman Jan. 29, 2025, 10:42 p.m. UTC | #5
On Wed, 2025-01-29 at 22:04 +0000, Peilin Ye wrote:

[...]

> > > +static int check_atomic_load(struct bpf_verifier_env *env, int insn_idx,
> > > +			     struct bpf_insn *insn)
> > > +{
> > > +	struct bpf_reg_state *regs = cur_regs(env);
> > > +	int err;
> > > +
> > > +	err = check_reg_arg(env, insn->src_reg, SRC_OP);
> > > +	if (err)
> > > +		return err;
> > > +
> > > +	err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
> > > +	if (err)
> > > +		return err;
> > > +
> > > +	if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) {
> > > +		verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n",
> > > +			insn->src_reg,
> > > +			reg_type_str(env, reg_state(env, insn->src_reg)->type));
> > > +		return -EACCES;
> > > +	}
> > > +
> > > +	if (is_arena_reg(env, insn->src_reg)) {
> > > +		err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
> > > +		if (err)
> > > +			return err;
> > 
> > Nit: this and the next function look very similar to processing of
> >      generic load and store in do_check(). Maybe extract that code
> >      as an auxiliary function and call it in both places?
> 
> Sure, I agree that they look a bit repetitive.
> 
> >      The only major difference is is_arena_reg() check guarding
> >      save_aux_ptr_type(), but I think it is ok to do save_aux_ptr_type
> >      unconditionally. Fwiw, the code would be a bit simpler,
> >      just spent half an hour convincing myself that such conditional handling
> >      is not an error. Wdyt?
> 
> :-O
> 
> Thanks a lot for that; would you mind sharing a bit more on how you
> reasoned about it (i.e., why is it OK to save_aux_ptr_type()
> unconditionally) ?

Well, save_aux_ptr_type() does two things:
- if there is no env->insn_aux_data[env->insn_idx].ptr_type associated
  with the instruction it saves one;
- if there is .ptr_type, it checks if a new one is compatible and
  errors out if it's not.

The .ptr_type is used in convert_ctx_accesses() to rewrite access
instruction (STX/LDX, atomic or not) in a way specific to pointer
type.

So, doing save_aux_ptr_type() conditionally is already sketchy,
as there is a risk to miss if some instruction is used in a context
where pointer type requires different rewrites.

convert_ctx_accesses() rewrites instruction for pointer following
types:
- PTR_TO_CTX
- PTR_TO_SOCKET
- PTR_TO_SOCK_COMMON
- PTR_TO_TCP_SOCK
- PTR_TO_XDP_SOCK
- PTR_TO_BTF_ID
- PTR_TO_ARENA

atomic_ptr_type_ok() allows the following pointer types:
- CONST_PTR_TO_MAP
- PTR_TO_MAP_VALUE
- PTR_TO_MAP_KEY
- PTR_TO_STACK
- PTR_TO_BTF_ID
- PTR_TO_MEM
- PTR_TO_ARENA
- PTR_TO_BUF
- PTR_TO_FUNC
- CONST_PTR_TO_DYNPTR

One has to check rewrites applied by convert_ctx_accesses() to atomic
instructions to reason about correctness of the conditional
save_aux_ptr_type() call.

If is_arena_reg() guard is removed from save_aux_ptr_type() we risk to
reject programs that do atomic load/store where same instruction is
used to modify a pointer that can be either of the above types.
I speculate that this is not the problem, as do_check() processing for
BPF_STX/BPF_LDX already calls save_aux_ptr_type() unconditionally.

[...]
Alexei Starovoitov Jan. 30, 2025, 12:41 a.m. UTC | #6
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index da729cbbaeb9..ab082ab9d535 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -1663,14 +1663,17 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
>         INSN_3(JMP, JSET, K),                   \
>         INSN_2(JMP, JA),                        \
>         INSN_2(JMP32, JA),                      \
> +       /* Atomic operations. */                \
> +       INSN_3(STX, ATOMIC, B),                 \
> +       INSN_3(STX, ATOMIC, H),                 \
> +       INSN_3(STX, ATOMIC, W),                 \
> +       INSN_3(STX, ATOMIC, DW),                \
>         /* Store instructions. */               \
>         /*   Register based. */                 \
>         INSN_3(STX, MEM,  B),                   \
>         INSN_3(STX, MEM,  H),                   \
>         INSN_3(STX, MEM,  W),                   \
>         INSN_3(STX, MEM,  DW),                  \
> -       INSN_3(STX, ATOMIC, W),                 \
> -       INSN_3(STX, ATOMIC, DW),                \
>         /*   Immediate based. */                \
>         INSN_3(ST, MEM, B),                     \
>         INSN_3(ST, MEM, H),                     \
> @@ -2169,6 +2172,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
>
>         STX_ATOMIC_DW:
>         STX_ATOMIC_W:
> +       STX_ATOMIC_H:
> +       STX_ATOMIC_B:
>                 switch (IMM) {
>                 ATOMIC_ALU_OP(BPF_ADD, add)
>                 ATOMIC_ALU_OP(BPF_AND, and)

STX_ATOMI_[BH] looks wrong.
It will do atomic64_*() ops in wrong size.
BPF_INSN_MAP() applies to bpf_opcode_in_insntable()
and the verifier will allow such new insns.
Peilin Ye Jan. 30, 2025, 3:10 a.m. UTC | #7
On Wed, Jan 29, 2025 at 02:42:41PM -0800, Eduard Zingerman wrote:
> On Wed, 2025-01-29 at 22:04 +0000, Peilin Ye wrote:
> > Thanks a lot for that; would you mind sharing a bit more on how you
> > reasoned about it (i.e., why is it OK to save_aux_ptr_type()
> > unconditionally) ?
> 
> Well, save_aux_ptr_type() does two things:
> - if there is no env->insn_aux_data[env->insn_idx].ptr_type associated
>   with the instruction it saves one;
> - if there is .ptr_type, it checks if a new one is compatible and
>   errors out if it's not.
> 
> The .ptr_type is used in convert_ctx_accesses() to rewrite access
> instruction (STX/LDX, atomic or not) in a way specific to pointer
> type.
> 
> So, doing save_aux_ptr_type() conditionally is already sketchy,
> as there is a risk to miss if some instruction is used in a context
> where pointer type requires different rewrites.
> 
> convert_ctx_accesses() rewrites instruction for pointer following
> types:
> - PTR_TO_CTX
> - PTR_TO_SOCKET
> - PTR_TO_SOCK_COMMON
> - PTR_TO_TCP_SOCK
> - PTR_TO_XDP_SOCK
> - PTR_TO_BTF_ID
> - PTR_TO_ARENA
> 
> atomic_ptr_type_ok() allows the following pointer types:
> - CONST_PTR_TO_MAP
> - PTR_TO_MAP_VALUE
> - PTR_TO_MAP_KEY
> - PTR_TO_STACK
> - PTR_TO_BTF_ID
> - PTR_TO_MEM
> - PTR_TO_ARENA
> - PTR_TO_BUF
> - PTR_TO_FUNC
> - CONST_PTR_TO_DYNPTR
> 
> One has to check rewrites applied by convert_ctx_accesses() to atomic
> instructions to reason about correctness of the conditional
> save_aux_ptr_type() call.
>
> If is_arena_reg() guard is removed from save_aux_ptr_type() we risk to
> reject programs that do atomic load/store where same instruction is
> used to modify a pointer that can be either of the above types.
> I speculate that this is not the problem, as do_check() processing for
> BPF_STX/BPF_LDX already calls save_aux_ptr_type() unconditionally.

I see, thanks for the explanation!

Peilin Ye
Peilin Ye Jan. 30, 2025, 3:38 a.m. UTC | #8
Hi Alexei,

On Wed, Jan 29, 2025 at 04:41:31PM -0800, Alexei Starovoitov wrote:
> > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> > index da729cbbaeb9..ab082ab9d535 100644
> > --- a/kernel/bpf/core.c
> > +++ b/kernel/bpf/core.c
> > @@ -1663,14 +1663,17 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
> >         INSN_3(JMP, JSET, K),                   \
> >         INSN_2(JMP, JA),                        \
> >         INSN_2(JMP32, JA),                      \
> > +       /* Atomic operations. */                \
> > +       INSN_3(STX, ATOMIC, B),                 \
> > +       INSN_3(STX, ATOMIC, H),                 \
> > +       INSN_3(STX, ATOMIC, W),                 \
> > +       INSN_3(STX, ATOMIC, DW),                \
> >         /* Store instructions. */               \
> >         /*   Register based. */                 \
> >         INSN_3(STX, MEM,  B),                   \
> >         INSN_3(STX, MEM,  H),                   \
> >         INSN_3(STX, MEM,  W),                   \
> >         INSN_3(STX, MEM,  DW),                  \
> > -       INSN_3(STX, ATOMIC, W),                 \
> > -       INSN_3(STX, ATOMIC, DW),                \
> >         /*   Immediate based. */                \
> >         INSN_3(ST, MEM, B),                     \
> >         INSN_3(ST, MEM, H),                     \
> > @@ -2169,6 +2172,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
> >
> >         STX_ATOMIC_DW:
> >         STX_ATOMIC_W:
> > +       STX_ATOMIC_H:
> > +       STX_ATOMIC_B:
> >                 switch (IMM) {
> >                 ATOMIC_ALU_OP(BPF_ADD, add)
> >                 ATOMIC_ALU_OP(BPF_AND, and)
> 
> STX_ATOMI_[BH] looks wrong.
> It will do atomic64_*() ops in wrong size.
> BPF_INSN_MAP() applies to bpf_opcode_in_insntable()
> and the verifier will allow such new insns.

We still have this check in check_atomic():

  if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
          verbose(env, "invalid atomic operand size\n");
          return -EINVAL;
  }

(moved to check_atomic_rmw() in PATCH 2/8)

Looks like it cannot be triggered before this patch, since
STX_ATOMIC_[BH] would've already been rejected by that
bpf_opcode_in_insntable() check before reaching check_atomic().

I agree that the interpreter code handling RMW atomics might now look a
bit confusing though.  In v2 I'll refactor that part and/or add comments
to make it clearer in the code that:

  * only W and DW are allowed for atomic RMW
  * all of B, H, W and DW are allowed for atomic load/store

Thanks,
Peilin Ye
diff mbox series

Patch

diff --git a/include/linux/filter.h b/include/linux/filter.h
index a3ea46281595..e36812a5b01f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -364,6 +364,8 @@  static inline bool insn_is_cast_user(const struct bpf_insn *insn)
  *   BPF_XOR | BPF_FETCH      src_reg = atomic_fetch_xor(dst_reg + off16, src_reg);
  *   BPF_XCHG                 src_reg = atomic_xchg(dst_reg + off16, src_reg)
  *   BPF_CMPXCHG              r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg)
+ *   BPF_LOAD_ACQ             dst_reg = smp_load_acquire(src_reg + off16)
+ *   BPF_STORE_REL            smp_store_release(dst_reg + off16, src_reg)
  */
 
 #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF)			\
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2acf9b336371..4a20a125eb46 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -51,6 +51,19 @@ 
 #define BPF_XCHG	(0xe0 | BPF_FETCH)	/* atomic exchange */
 #define BPF_CMPXCHG	(0xf0 | BPF_FETCH)	/* atomic compare-and-write */
 
+#define BPF_ATOMIC_LOAD		0x10
+#define BPF_ATOMIC_STORE	0x20
+#define BPF_ATOMIC_TYPE(imm)	((imm) & 0xf0)
+
+#define BPF_RELAXED	0x00
+#define BPF_ACQUIRE	0x01
+#define BPF_RELEASE	0x02
+#define BPF_ACQ_REL	0x03
+#define BPF_SEQ_CST	0x04
+
+#define BPF_LOAD_ACQ	(BPF_ATOMIC_LOAD | BPF_ACQUIRE)		/* load-acquire */
+#define BPF_STORE_REL	(BPF_ATOMIC_STORE | BPF_RELEASE)	/* store-release */
+
 enum bpf_cond_pseudo_jmp {
 	BPF_MAY_GOTO = 0,
 };
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index da729cbbaeb9..ab082ab9d535 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1663,14 +1663,17 @@  EXPORT_SYMBOL_GPL(__bpf_call_base);
 	INSN_3(JMP, JSET, K),			\
 	INSN_2(JMP, JA),			\
 	INSN_2(JMP32, JA),			\
+	/* Atomic operations. */		\
+	INSN_3(STX, ATOMIC, B),			\
+	INSN_3(STX, ATOMIC, H),			\
+	INSN_3(STX, ATOMIC, W),			\
+	INSN_3(STX, ATOMIC, DW),		\
 	/* Store instructions. */		\
 	/*   Register based. */			\
 	INSN_3(STX, MEM,  B),			\
 	INSN_3(STX, MEM,  H),			\
 	INSN_3(STX, MEM,  W),			\
 	INSN_3(STX, MEM,  DW),			\
-	INSN_3(STX, ATOMIC, W),			\
-	INSN_3(STX, ATOMIC, DW),		\
 	/*   Immediate based. */		\
 	INSN_3(ST, MEM, B),			\
 	INSN_3(ST, MEM, H),			\
@@ -2169,6 +2172,8 @@  static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
 
 	STX_ATOMIC_DW:
 	STX_ATOMIC_W:
+	STX_ATOMIC_H:
+	STX_ATOMIC_B:
 		switch (IMM) {
 		ATOMIC_ALU_OP(BPF_ADD, add)
 		ATOMIC_ALU_OP(BPF_AND, and)
@@ -2196,6 +2201,38 @@  static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
 					(atomic64_t *)(unsigned long) (DST + insn->off),
 					(u64) BPF_R0, (u64) SRC);
 			break;
+		case BPF_LOAD_ACQ:
+			switch (BPF_SIZE(insn->code)) {
+#define LOAD_ACQUIRE(SIZEOP, SIZE)				\
+			case BPF_##SIZEOP:			\
+				DST = (SIZE)smp_load_acquire(	\
+					(SIZE *)(unsigned long)(SRC + insn->off));	\
+				break;
+			LOAD_ACQUIRE(B,   u8)
+			LOAD_ACQUIRE(H,  u16)
+			LOAD_ACQUIRE(W,  u32)
+			LOAD_ACQUIRE(DW, u64)
+#undef LOAD_ACQUIRE
+			default:
+				goto default_label;
+			}
+			break;
+		case BPF_STORE_REL:
+			switch (BPF_SIZE(insn->code)) {
+#define STORE_RELEASE(SIZEOP, SIZE)			\
+			case BPF_##SIZEOP:		\
+				smp_store_release(	\
+					(SIZE *)(unsigned long)(DST + insn->off), (SIZE)SRC);	\
+				break;
+			STORE_RELEASE(B,   u8)
+			STORE_RELEASE(H,  u16)
+			STORE_RELEASE(W,  u32)
+			STORE_RELEASE(DW, u64)
+#undef STORE_RELEASE
+			default:
+				goto default_label;
+			}
+			break;
 
 		default:
 			goto default_label;
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index 309c4aa1b026..974d172d6735 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -267,6 +267,18 @@  void print_bpf_insn(const struct bpf_insn_cbs *cbs,
 				BPF_SIZE(insn->code) == BPF_DW ? "64" : "",
 				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
 				insn->dst_reg, insn->off, insn->src_reg);
+		} else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
+			   insn->imm == BPF_LOAD_ACQ) {
+			verbose(cbs->private_data, "(%02x) r%d = load_acquire((%s *)(r%d %+d))\n",
+				insn->code, insn->dst_reg,
+				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+				insn->src_reg, insn->off);
+		} else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
+			   insn->imm == BPF_STORE_REL) {
+			verbose(cbs->private_data, "(%02x) store_release((%s *)(r%d %+d), r%d)\n",
+				insn->code,
+				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+				insn->dst_reg, insn->off, insn->src_reg);
 		} else {
 			verbose(cbs->private_data, "BUG_%02x\n", insn->code);
 		}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2b3caa7549af..e44abe22aac9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -579,6 +579,13 @@  static bool is_cmpxchg_insn(const struct bpf_insn *insn)
 	       insn->imm == BPF_CMPXCHG;
 }
 
+static bool is_atomic_load_insn(const struct bpf_insn *insn)
+{
+	return BPF_CLASS(insn->code) == BPF_STX &&
+	       BPF_MODE(insn->code) == BPF_ATOMIC &&
+	       BPF_ATOMIC_TYPE(insn->imm) == BPF_ATOMIC_LOAD;
+}
+
 static int __get_spi(s32 off)
 {
 	return (-off - 1) / BPF_REG_SIZE;
@@ -3481,7 +3488,7 @@  static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	}
 
 	if (class == BPF_STX) {
-		/* BPF_STX (including atomic variants) has multiple source
+		/* BPF_STX (including atomic variants) has one or more source
 		 * operands, one of which is a ptr. Check whether the caller is
 		 * asking about it.
 		 */
@@ -4095,7 +4102,7 @@  static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
 			   * dreg still needs precision before this insn
 			   */
 		}
-	} else if (class == BPF_LDX) {
+	} else if (class == BPF_LDX || is_atomic_load_insn(insn)) {
 		if (!bt_is_reg_set(bt, dreg))
 			return 0;
 		bt_clear_reg(bt, dreg);
@@ -7625,6 +7632,86 @@  static int check_atomic_rmw(struct bpf_verifier_env *env, int insn_idx,
 	return 0;
 }
 
+static int check_atomic_load(struct bpf_verifier_env *env, int insn_idx,
+			     struct bpf_insn *insn)
+{
+	struct bpf_reg_state *regs = cur_regs(env);
+	int err;
+
+	err = check_reg_arg(env, insn->src_reg, SRC_OP);
+	if (err)
+		return err;
+
+	err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
+	if (err)
+		return err;
+
+	if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) {
+		verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n",
+			insn->src_reg,
+			reg_type_str(env, reg_state(env, insn->src_reg)->type));
+		return -EACCES;
+	}
+
+	if (is_arena_reg(env, insn->src_reg)) {
+		err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
+		if (err)
+			return err;
+	}
+
+	/* Check whether we can read the memory. */
+	err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
+			       BPF_SIZE(insn->code), BPF_READ, insn->dst_reg,
+			       true, false);
+	if (err)
+		return err;
+
+	err = reg_bounds_sanity_check(env, &regs[insn->dst_reg], "atomic_load");
+	if (err)
+		return err;
+	return 0;
+}
+
+static int check_atomic_store(struct bpf_verifier_env *env, int insn_idx,
+			      struct bpf_insn *insn)
+{
+	int err;
+
+	err = check_reg_arg(env, insn->src_reg, SRC_OP);
+	if (err)
+		return err;
+
+	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
+	if (err)
+		return err;
+
+	if (is_pointer_value(env, insn->src_reg)) {
+		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
+		return -EACCES;
+	}
+
+	if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
+		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
+			insn->dst_reg,
+			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
+		return -EACCES;
+	}
+
+	if (is_arena_reg(env, insn->dst_reg)) {
+		err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
+		if (err)
+			return err;
+	}
+
+	/* Check whether we can write into the memory. */
+	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+			       BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg,
+			       true, false);
+	if (err)
+		return err;
+	return 0;
+}
+
 static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
 {
 	switch (insn->imm) {
@@ -7639,6 +7726,10 @@  static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
 	case BPF_XCHG:
 	case BPF_CMPXCHG:
 		return check_atomic_rmw(env, insn_idx, insn);
+	case BPF_LOAD_ACQ:
+		return check_atomic_load(env, insn_idx, insn);
+	case BPF_STORE_REL:
+		return check_atomic_store(env, insn_idx, insn);
 	default:
 		verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
 		return -EINVAL;
@@ -20420,7 +20511,9 @@  static int convert_ctx_accesses(struct bpf_verifier_env *env)
 			   insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
 			   insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
 			type = BPF_WRITE;
-		} else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
+		} else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) ||
+			    insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) ||
+			    insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
 			    insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) &&
 			   env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) {
 			insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2acf9b336371..4a20a125eb46 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -51,6 +51,19 @@ 
 #define BPF_XCHG	(0xe0 | BPF_FETCH)	/* atomic exchange */
 #define BPF_CMPXCHG	(0xf0 | BPF_FETCH)	/* atomic compare-and-write */
 
+#define BPF_ATOMIC_LOAD		0x10
+#define BPF_ATOMIC_STORE	0x20
+#define BPF_ATOMIC_TYPE(imm)	((imm) & 0xf0)
+
+#define BPF_RELAXED	0x00
+#define BPF_ACQUIRE	0x01
+#define BPF_RELEASE	0x02
+#define BPF_ACQ_REL	0x03
+#define BPF_SEQ_CST	0x04
+
+#define BPF_LOAD_ACQ	(BPF_ATOMIC_LOAD | BPF_ACQUIRE)		/* load-acquire */
+#define BPF_STORE_REL	(BPF_ATOMIC_STORE | BPF_RELEASE)	/* store-release */
+
 enum bpf_cond_pseudo_jmp {
 	BPF_MAY_GOTO = 0,
 };