@@ -21,6 +21,31 @@
DEFINE_BPF_STORAGE_CACHE(inode_cache);
+static DEFINE_PER_CPU(int, bpf_inode_storage_busy);
+
+static void bpf_inode_storage_lock(void)
+{
+ migrate_disable();
+ this_cpu_inc(bpf_inode_storage_busy);
+}
+
+static void bpf_inode_storage_unlock(void)
+{
+ this_cpu_dec(bpf_inode_storage_busy);
+ migrate_enable();
+}
+
+static bool bpf_inode_storage_trylock(void)
+{
+ migrate_disable();
+ if (unlikely(this_cpu_inc_return(bpf_inode_storage_busy) != 1)) {
+ this_cpu_dec(bpf_inode_storage_busy);
+ migrate_enable();
+ return false;
+ }
+ return true;
+}
+
static struct bpf_local_storage __rcu **inode_storage_ptr(void *owner)
{
struct inode *inode = owner;
@@ -56,7 +81,9 @@ void bpf_inode_storage_free(struct inode *inode)
return;
}
+ bpf_inode_storage_lock();
bpf_local_storage_destroy(local_storage);
+ bpf_inode_storage_unlock();
rcu_read_unlock();
}
@@ -68,7 +95,9 @@ static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key)
if (fd_empty(f))
return ERR_PTR(-EBADF);
+ bpf_inode_storage_lock();
sdata = inode_storage_lookup(file_inode(fd_file(f)), map, true);
+ bpf_inode_storage_unlock();
return sdata ? sdata->data : NULL;
}
@@ -81,13 +110,16 @@ static long bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
if (fd_empty(f))
return -EBADF;
+ bpf_inode_storage_lock();
sdata = bpf_local_storage_update(file_inode(fd_file(f)),
(struct bpf_local_storage_map *)map,
value, map_flags, false, GFP_ATOMIC);
+ bpf_inode_storage_unlock();
return PTR_ERR_OR_ZERO(sdata);
}
-static int inode_storage_delete(struct inode *inode, struct bpf_map *map)
+static int inode_storage_delete(struct inode *inode, struct bpf_map *map,
+ bool nobusy)
{
struct bpf_local_storage_data *sdata;
@@ -95,6 +127,9 @@ static int inode_storage_delete(struct inode *inode, struct bpf_map *map)
if (!sdata)
return -ENOENT;
+ if (!nobusy)
+ return -EBUSY;
+
bpf_selem_unlink(SELEM(sdata), false);
return 0;
@@ -102,55 +137,105 @@ static int inode_storage_delete(struct inode *inode, struct bpf_map *map)
static long bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
{
+ int err;
+
CLASS(fd_raw, f)(*(int *)key);
if (fd_empty(f))
return -EBADF;
- return inode_storage_delete(file_inode(fd_file(f)), map);
+ bpf_inode_storage_lock();
+ err = inode_storage_delete(file_inode(fd_file(f)), map, true);
+ bpf_inode_storage_unlock();
+ return err;
}
-/* *gfp_flags* is a hidden argument provided by the verifier */
-BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
- void *, value, u64, flags, gfp_t, gfp_flags)
+static void *__bpf_inode_storage_get(struct bpf_map *map, struct inode *inode,
+ void *value, u64 flags, gfp_t gfp_flags, bool nobusy)
{
struct bpf_local_storage_data *sdata;
- WARN_ON_ONCE(!bpf_rcu_lock_held());
- if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
- return (unsigned long)NULL;
-
+ /* explicitly check that the inode not NULL */
if (!inode)
- return (unsigned long)NULL;
+ return NULL;
sdata = inode_storage_lookup(inode, map, true);
if (sdata)
- return (unsigned long)sdata->data;
+ return sdata->data;
- /* This helper must only called from where the inode is guaranteed
- * to have a refcount and cannot be freed.
- */
- if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
+ /* only allocate new storage, when the inode is refcounted */
+ if (atomic_read(&inode->i_count) &&
+ flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
sdata = bpf_local_storage_update(
inode, (struct bpf_local_storage_map *)map, value,
BPF_NOEXIST, false, gfp_flags);
- return IS_ERR(sdata) ? (unsigned long)NULL :
- (unsigned long)sdata->data;
+ return IS_ERR(sdata) ? NULL : sdata->data;
}
- return (unsigned long)NULL;
+ return NULL;
+}
+
+/* *gfp_flags* is a hidden argument provided by the verifier */
+BPF_CALL_5(bpf_inode_storage_get_recur, struct bpf_map *, map, struct inode *, inode,
+ void *, value, u64, flags, gfp_t, gfp_flags)
+{
+ bool nobusy;
+ void *data;
+
+ WARN_ON_ONCE(!bpf_rcu_lock_held());
+ if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
+ return (unsigned long)NULL;
+
+ nobusy = bpf_inode_storage_trylock();
+ data = __bpf_inode_storage_get(map, inode, value, flags, gfp_flags, nobusy);
+ if (nobusy)
+ bpf_inode_storage_unlock();
+ return (unsigned long)data;
+}
+
+/* *gfp_flags* is a hidden argument provided by the verifier */
+BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
+ void *, value, u64, flags, gfp_t, gfp_flags)
+{
+ void *data;
+
+ WARN_ON_ONCE(!bpf_rcu_lock_held());
+ if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
+ return (unsigned long)NULL;
+
+ bpf_inode_storage_lock();
+ data = __bpf_inode_storage_get(map, inode, value, flags, gfp_flags, true);
+ bpf_inode_storage_unlock();
+ return (unsigned long)data;
+}
+
+BPF_CALL_2(bpf_inode_storage_delete_recur, struct bpf_map *, map, struct inode *, inode)
+{
+ bool nobusy;
+ int ret;
+
+ WARN_ON_ONCE(!bpf_rcu_lock_held());
+ if (!inode)
+ return -EINVAL;
+
+ nobusy = bpf_inode_storage_trylock();
+ ret = inode_storage_delete(inode, map, nobusy);
+ if (nobusy)
+ bpf_inode_storage_unlock();
+ return ret;
}
-BPF_CALL_2(bpf_inode_storage_delete,
- struct bpf_map *, map, struct inode *, inode)
+BPF_CALL_2(bpf_inode_storage_delete, struct bpf_map *, map, struct inode *, inode)
{
+ int ret;
+
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (!inode)
return -EINVAL;
- /* This helper must only called from where the inode is guaranteed
- * to have a refcount and cannot be freed.
- */
- return inode_storage_delete(inode, map);
+ bpf_inode_storage_lock();
+ ret = inode_storage_delete(inode, map, true);
+ bpf_inode_storage_unlock();
+ return ret;
}
static int notsupp_get_next_key(struct bpf_map *map, void *key,
@@ -186,6 +271,17 @@ const struct bpf_map_ops inode_storage_map_ops = {
BTF_ID_LIST_SINGLE(bpf_inode_storage_btf_ids, struct, inode)
+const struct bpf_func_proto bpf_inode_storage_get_recur_proto = {
+ .func = bpf_inode_storage_get_recur,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
+ .arg2_btf_id = &bpf_inode_storage_btf_ids[0],
+ .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg4_type = ARG_ANYTHING,
+};
+
const struct bpf_func_proto bpf_inode_storage_get_proto = {
.func = bpf_inode_storage_get,
.gpl_only = false,
@@ -197,6 +293,15 @@ const struct bpf_func_proto bpf_inode_storage_get_proto = {
.arg4_type = ARG_ANYTHING,
};
+const struct bpf_func_proto bpf_inode_storage_delete_recur_proto = {
+ .func = bpf_inode_storage_delete_recur,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
+ .arg2_btf_id = &bpf_inode_storage_btf_ids[0],
+};
+
const struct bpf_func_proto bpf_inode_storage_delete_proto = {
.func = bpf_inode_storage_delete,
.gpl_only = false,
@@ -1554,8 +1554,12 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_task_storage_delete_recur_proto;
return &bpf_task_storage_delete_proto;
case BPF_FUNC_inode_storage_get:
+ if (bpf_prog_check_recur(prog))
+ return &bpf_inode_storage_get_recur_proto;
return &bpf_inode_storage_get_proto;
case BPF_FUNC_inode_storage_delete:
+ if (bpf_prog_check_recur(prog))
+ return &bpf_inode_storage_delete_recur_proto;
return &bpf_inode_storage_delete_proto;
case BPF_FUNC_for_each_map_elem:
return &bpf_for_each_map_elem_proto;
This logic is similar to the recursion prevention logic for task local storage: bpf programs on LSM hooks lock bpf_inode_storage_busy; bpf tracing program will try to lock bpf_inode_storage_busy, and may return -EBUSY if something else already lock bpf_inode_storage_busy on the same CPU. Signed-off-by: Song Liu <song@kernel.org> --- kernel/bpf/bpf_inode_storage.c | 153 +++++++++++++++++++++++++++------ kernel/trace/bpf_trace.c | 4 + 2 files changed, 133 insertions(+), 24 deletions(-)