diff mbox series

[RFCv2,bpf-next,10/18] bpf: Verifier tracking of rbtree_spin_lock held

Message ID 20220830172759.4069786-11-davemarchevsky@fb.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series bpf: Introduce rbtree map | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail merge-conflict
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/apply fail Patch does not apply to bpf-next

Commit Message

Dave Marchevsky Aug. 30, 2022, 5:27 p.m. UTC
This patch teaches the verifier that rbtree_{lock,unlock} interact with
locks and eases use of rbtree_lock(&some_lock) where &some_lock is in a
global internal map.

The verifier now tracks lock id for rbtree_{lock,unlock} and understands
that lock can / must be held when calling various other rbtree helpers.

Logic is also added to ease this pattern:

  /* internal map */
  struct bpf_spin_lock lock SEC(".bss.private");

  /* In bpf prog */

  rbtree_lock(&lock);
  /* ... use all registers for other work */
  rbtree_unlock(&lock);

In the above example, the prog compiles down to something like:

  r1 = 0x12345
  call rbtree_lock
  /* begin other work
  r1 = some_unrelated_value
  r2 = r1 or similar never happens
  */
  r1 = 0x12345
  call rbtree_unlock

Each time "r1 = 0x12345" happens, verifier's check_ld_imm will assign a
new id to the lock, which will result in it later complaining that
the incorrect lock is being unlocked when checking rbtree_unlock.

To help with this pattern, bpf_verifier_state now has a
maybe_active_spin_lock_addr field. If this field is nonzero and
bpf_verifier_state's active_spin_lock is also nonzero, then
maybe_active_spin_lock_addr contains the address of the active spin lock
(corresponding to active_spin_lock's id). This allows the verifier to
avoid assigning a new lock id when it sees the second "r1 = 0x12345",
since it can recognize that the address matches an existing lock id.

[ RFC Notes:

  * rbtree_process_spin_lock should be merged w/ normal
    process_spin_lock, same with rbtree_lock and normal lock helpers.
    Left separate for now to highlight the logic differences.

  * The hacky maybe_active_spin_lock_addr logic can be improved by
    adding support to a custom .lock section similar to existing use of
    .bss.private. The new section type would function like .bss.private,
    but the verifier would know that locks in .lock are likely to be
    used like bpf_spin_lock(&lock), and could track the address of each
    map value for deduping, instead of just tracking single address. For
    multiple-lock scenario this is probably necessary.
]

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
---
 include/linux/bpf.h          |   2 +
 include/linux/bpf_verifier.h |   1 +
 kernel/bpf/rbtree.c          |   2 +-
 kernel/bpf/verifier.c        | 136 ++++++++++++++++++++++++++++++++---
 4 files changed, 129 insertions(+), 12 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b4a44ffb0d6c..d6458aa7b79c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -497,6 +497,7 @@  enum bpf_return_type {
 	RET_PTR_TO_ALLOC_MEM,		/* returns a pointer to dynamically allocated memory */
 	RET_PTR_TO_MEM_OR_BTF_ID,	/* returns a pointer to a valid memory or a btf_id */
 	RET_PTR_TO_BTF_ID,		/* returns a pointer to a btf_id */
+	RET_PTR_TO_SPIN_LOCK,		/* returns a pointer to a struct bpf_spin_lock */
 	__BPF_RET_TYPE_MAX,
 
 	/* Extended ret_types. */
@@ -612,6 +613,7 @@  enum bpf_reg_type {
 	PTR_TO_MEM,		 /* reg points to valid memory region */
 	PTR_TO_BUF,		 /* reg points to a read/write buffer */
 	PTR_TO_FUNC,		 /* reg points to a bpf program function */
+	PTR_TO_SPIN_LOCK,	 /* reg points to a struct bpf_spin_lock */
 	__BPF_REG_TYPE_MAX,
 
 	/* Extended reg_types. */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 9c017575c034..f81638844a4d 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -313,6 +313,7 @@  struct bpf_verifier_state {
 	u32 insn_idx;
 	u32 curframe;
 	u32 active_spin_lock;
+	void *maybe_active_spin_lock_addr;
 	bool speculative;
 
 	/* first and last insn idx of this verifier state */
diff --git a/kernel/bpf/rbtree.c b/kernel/bpf/rbtree.c
index c61662822511..0821e841a518 100644
--- a/kernel/bpf/rbtree.c
+++ b/kernel/bpf/rbtree.c
@@ -305,7 +305,7 @@  BPF_CALL_1(bpf_rbtree_get_lock, struct bpf_map *, map)
 const struct bpf_func_proto bpf_rbtree_get_lock_proto = {
 	.func = bpf_rbtree_get_lock,
 	.gpl_only = true,
-	.ret_type = RET_PTR_TO_MAP_VALUE,
+	.ret_type = RET_PTR_TO_SPIN_LOCK,
 	.arg1_type = ARG_CONST_MAP_PTR,
 };
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b9e5d87fe323..f8ba381f1327 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -452,8 +452,9 @@  static bool reg_type_not_null(enum bpf_reg_type type)
 
 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
 {
-	return reg->type == PTR_TO_MAP_VALUE &&
-		map_value_has_spin_lock(reg->map_ptr);
+	return (reg->type == PTR_TO_MAP_VALUE &&
+		map_value_has_spin_lock(reg->map_ptr)) ||
+		reg->type == PTR_TO_SPIN_LOCK;
 }
 
 static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
@@ -507,6 +508,34 @@  static bool is_ptr_cast_function(enum bpf_func_id func_id)
 		func_id == BPF_FUNC_skc_to_tcp_request_sock;
 }
 
+/* These functions can only be called when spinlock associated with rbtree
+ * is held. If they have a callback argument, that callback is not required
+ * to release active_spin_lock before exiting
+ */
+static bool is_rbtree_lock_required_function(enum bpf_func_id func_id)
+{
+	return func_id == BPF_FUNC_rbtree_add ||
+		func_id == BPF_FUNC_rbtree_remove ||
+		func_id == BPF_FUNC_rbtree_find ||
+		func_id == BPF_FUNC_rbtree_unlock;
+}
+
+/* These functions are OK to call when spinlock associated with rbtree
+ * is held.
+ */
+static bool is_rbtree_lock_ok_function(enum bpf_func_id func_id)
+{
+	return func_id == BPF_FUNC_rbtree_alloc_node ||
+		func_id == BPF_FUNC_rbtree_free_node ||
+		is_rbtree_lock_required_function(func_id);
+}
+
+static bool is_lock_allowed_function(enum bpf_func_id func_id)
+{
+	return func_id == BPF_FUNC_spin_unlock ||
+		is_rbtree_lock_ok_function(func_id);
+}
+
 static bool is_dynptr_ref_function(enum bpf_func_id func_id)
 {
 	return func_id == BPF_FUNC_dynptr_data;
@@ -579,6 +608,7 @@  static const char *reg_type_str(struct bpf_verifier_env *env,
 		[PTR_TO_BUF]		= "buf",
 		[PTR_TO_FUNC]		= "func",
 		[PTR_TO_MAP_KEY]	= "map_key",
+		[PTR_TO_SPIN_LOCK]	= "spin_lock",
 	};
 
 	if (type & PTR_MAYBE_NULL) {
@@ -1199,6 +1229,7 @@  static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 	dst_state->speculative = src->speculative;
 	dst_state->curframe = src->curframe;
 	dst_state->active_spin_lock = src->active_spin_lock;
+	dst_state->maybe_active_spin_lock_addr = src->maybe_active_spin_lock_addr;
 	dst_state->branches = src->branches;
 	dst_state->parent = src->parent;
 	dst_state->first_insn_idx = src->first_insn_idx;
@@ -5471,6 +5502,35 @@  static int process_spin_lock(struct bpf_verifier_env *env, int regno,
 			return -EINVAL;
 		}
 		cur->active_spin_lock = 0;
+		cur->maybe_active_spin_lock_addr = 0;
+	}
+	return 0;
+}
+
+static int rbtree_process_spin_lock(struct bpf_verifier_env *env, int regno,
+				    bool is_lock)
+{
+	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+	struct bpf_verifier_state *cur = env->cur_state;
+
+	if (is_lock) {
+		if (cur->active_spin_lock) {
+			verbose(env,
+				"Locking two bpf_spin_locks are not allowed\n");
+			return -EINVAL;
+		}
+		cur->active_spin_lock = reg->id;
+	} else {
+		if (!cur->active_spin_lock) {
+			verbose(env, "rbtree_spin_unlock without taking a lock\n");
+			return -EINVAL;
+		}
+		if (cur->active_spin_lock != reg->id) {
+			verbose(env, "rbtree_spin_unlock of different lock\n");
+			return -EINVAL;
+		}
+		cur->active_spin_lock = 0;
+		cur->maybe_active_spin_lock_addr = 0;
 	}
 	return 0;
 }
@@ -5686,12 +5746,18 @@  static const struct bpf_reg_types int_ptr_types = {
 	},
 };
 
+static const struct bpf_reg_types spin_lock_types = {
+	.types = {
+		PTR_TO_MAP_VALUE,
+		PTR_TO_SPIN_LOCK
+	},
+};
+
 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
 static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } };
 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
-static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
 static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
 static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_BTF_ID | MEM_PERCPU } };
 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
@@ -6057,8 +6123,12 @@  static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
 		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
 			if (process_spin_lock(env, regno, false))
 				return -EACCES;
-		} else if (meta->func_id == BPF_FUNC_rbtree_lock ||
-			   meta->func_id == BPF_FUNC_rbtree_unlock) { // Do nothing for now
+		} else if (meta->func_id == BPF_FUNC_rbtree_lock) {
+			if (rbtree_process_spin_lock(env, regno, true))
+				return -EACCES;
+		} else if (meta->func_id == BPF_FUNC_rbtree_unlock) {
+			if (rbtree_process_spin_lock(env, regno, false))
+				return -EACCES;
 		} else {
 			verbose(env, "verifier internal error\n");
 			return -EFAULT;
@@ -6993,6 +7063,29 @@  static int set_find_vma_callback_state(struct bpf_verifier_env *env,
 	return 0;
 }
 
+/* Are we currently verifying the callback for a rbtree helper that must
+ * be called with lock held? If so, no need to complain about unreleased
+ * lock
+ */
+static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
+{
+	struct bpf_verifier_state *state = env->cur_state;
+	struct bpf_insn *insn = env->prog->insnsi;
+	struct bpf_func_state *callee;
+	int func_id;
+
+	if (!state->curframe)
+		return false;
+
+	callee = state->frame[state->curframe];
+
+	if (!callee->in_callback_fn)
+		return false;
+
+	func_id = insn[callee->callsite].imm;
+	return is_rbtree_lock_required_function(func_id);
+}
+
 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 {
 	struct bpf_verifier_state *state = env->cur_state;
@@ -7508,6 +7601,11 @@  static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 			regs[BPF_REG_0].id = ++env->id_gen;
 		}
 		break;
+	case RET_PTR_TO_SPIN_LOCK:
+		mark_reg_known_zero(env, regs, BPF_REG_0);
+		regs[BPF_REG_0].type = PTR_TO_SPIN_LOCK | ret_flag;
+		regs[BPF_REG_0].id = ++env->id_gen;
+		break;
 	case RET_PTR_TO_SOCKET:
 		mark_reg_known_zero(env, regs, BPF_REG_0);
 		regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
@@ -10366,6 +10464,20 @@  static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	return 0;
 }
 
+static unsigned int ld_imm_lock_id_gen(struct bpf_verifier_env *env,
+					     void *imm)
+{
+	struct bpf_verifier_state *cur = env->cur_state;
+
+	if (cur->active_spin_lock && cur->maybe_active_spin_lock_addr &&
+	    cur->maybe_active_spin_lock_addr == imm)
+		return cur->active_spin_lock;
+
+	if (!cur->active_spin_lock)
+		cur->maybe_active_spin_lock_addr = imm;
+	return ++env->id_gen;
+}
+
 /* verify BPF_LD_IMM64 instruction */
 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
@@ -10373,6 +10485,7 @@  static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	struct bpf_reg_state *regs = cur_regs(env);
 	struct bpf_reg_state *dst_reg;
 	struct bpf_map *map;
+	u64 imm;
 	int err;
 
 	if (BPF_SIZE(insn->code) != BPF_DW) {
@@ -10390,7 +10503,7 @@  static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 
 	dst_reg = &regs[insn->dst_reg];
 	if (insn->src_reg == 0) {
-		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
+		imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
 
 		dst_reg->type = SCALAR_VALUE;
 		__mark_reg_known(&regs[insn->dst_reg], imm);
@@ -10441,13 +10554,14 @@  static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 
 	map = env->used_maps[aux->map_index];
 	dst_reg->map_ptr = map;
-
 	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
 	    insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
 		dst_reg->type = PTR_TO_MAP_VALUE;
 		dst_reg->off = aux->map_off;
-		if (map_value_has_spin_lock(map))
-			dst_reg->id = ++env->id_gen;
+		if (map_value_has_spin_lock(map)) {
+			imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
+			dst_reg->id = ld_imm_lock_id_gen(env, (void *)imm);
+		}
 	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
 		   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
 		dst_reg->type = CONST_PTR_TO_MAP;
@@ -12432,7 +12546,7 @@  static int do_check(struct bpf_verifier_env *env)
 
 				if (env->cur_state->active_spin_lock &&
 				    (insn->src_reg == BPF_PSEUDO_CALL ||
-				     insn->imm != BPF_FUNC_spin_unlock)) {
+				     !is_lock_allowed_function(insn->imm))) {
 					verbose(env, "function calls are not allowed while holding a lock\n");
 					return -EINVAL;
 				}
@@ -12467,7 +12581,7 @@  static int do_check(struct bpf_verifier_env *env)
 					return -EINVAL;
 				}
 
-				if (env->cur_state->active_spin_lock) {
+				if (state->active_spin_lock && !in_rbtree_lock_required_cb(env)) {
 					verbose(env, "bpf_spin_unlock is missing\n");
 					return -EINVAL;
 				}