Message ID | 20241112082600.298035-3-song@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Make inode storage available to tracing prog | expand |
On Tue, Nov 12, 2024 at 12:25:56AM -0800, Song Liu wrote: > inode storage can be useful for non-LSM program. For example, file* tools > from bcc/libbpf-tools can use inode storage instead of hash map; fanotify > fastpath [1] can also use inode storage to store useful data. > > Make inode storage available for tracing program. Move bpf inode storage > from a security blob to inode->i_bpf_storage, and adjust related code > accordingly. > > [1] https://lore.kernel.org/linux-fsdevel/20241029231244.2834368-1-song@kernel.org/ > Signed-off-by: Song Liu <song@kernel.org> > --- > fs/inode.c | 1 + > include/linux/bpf.h | 9 +++++++++ > include/linux/bpf_lsm.h | 29 ----------------------------- > include/linux/fs.h | 4 ++++ > kernel/bpf/Makefile | 3 +-- > kernel/bpf/bpf_inode_storage.c | 32 +++++--------------------------- > kernel/bpf/bpf_lsm.c | 4 ---- > kernel/trace/bpf_trace.c | 4 ++++ > security/bpf/hooks.c | 6 ------ > 9 files changed, 24 insertions(+), 68 deletions(-) > > diff --git a/fs/inode.c b/fs/inode.c > index 8dabb224f941..3c679578169f 100644 > --- a/fs/inode.c > +++ b/fs/inode.c > @@ -286,6 +286,7 @@ static struct inode *alloc_inode(struct super_block *sb) > void __destroy_inode(struct inode *inode) > { > BUG_ON(inode_has_buffers(inode)); > + bpf_inode_storage_free(inode); > inode_detach_wb(inode); > security_inode_free(inode); > fsnotify_inode_delete(inode); > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 1b84613b10ac..0b31d2e74df6 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -2672,6 +2672,7 @@ struct bpf_link *bpf_link_by_id(u32 id); > const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id, > const struct bpf_prog *prog); > void bpf_task_storage_free(struct task_struct *task); > +void bpf_inode_storage_free(struct inode *inode); > void bpf_cgrp_storage_free(struct cgroup *cgroup); > bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); > const struct btf_func_model * > @@ -2942,6 +2943,10 @@ static inline void bpf_task_storage_free(struct task_struct *task) > { > } > > +static inline void bpf_inode_storage_free(struct inode *inode) > +{ > +} > + > static inline bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog) > { > return false; > @@ -3305,6 +3310,10 @@ extern const struct bpf_func_proto bpf_task_storage_get_recur_proto; > extern const struct bpf_func_proto bpf_task_storage_get_proto; > extern const struct bpf_func_proto bpf_task_storage_delete_recur_proto; > extern const struct bpf_func_proto bpf_task_storage_delete_proto; > +extern const struct bpf_func_proto bpf_inode_storage_get_proto; > +extern const struct bpf_func_proto bpf_inode_storage_get_recur_proto; > +extern const struct bpf_func_proto bpf_inode_storage_delete_proto; > +extern const struct bpf_func_proto bpf_inode_storage_delete_recur_proto; > extern const struct bpf_func_proto bpf_for_each_map_elem_proto; > extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; > extern const struct bpf_func_proto bpf_sk_setsockopt_proto; > diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h > index aefcd6564251..a819c2f0a062 100644 > --- a/include/linux/bpf_lsm.h > +++ b/include/linux/bpf_lsm.h > @@ -19,31 +19,12 @@ > #include <linux/lsm_hook_defs.h> > #undef LSM_HOOK > > -struct bpf_storage_blob { > - struct bpf_local_storage __rcu *storage; > -}; > - > -extern struct lsm_blob_sizes bpf_lsm_blob_sizes; > - > int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, > const struct bpf_prog *prog); > > bool bpf_lsm_is_sleepable_hook(u32 btf_id); > bool bpf_lsm_is_trusted(const struct bpf_prog *prog); > > -static inline struct bpf_storage_blob *bpf_inode( > - const struct inode *inode) > -{ > - if (unlikely(!inode->i_security)) > - return NULL; > - > - return inode->i_security + bpf_lsm_blob_sizes.lbs_inode; > -} > - > -extern const struct bpf_func_proto bpf_inode_storage_get_proto; > -extern const struct bpf_func_proto bpf_inode_storage_delete_proto; > -void bpf_inode_storage_free(struct inode *inode); > - > void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func); > > int bpf_lsm_get_retval_range(const struct bpf_prog *prog, > @@ -66,16 +47,6 @@ static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, > return -EOPNOTSUPP; > } > > -static inline struct bpf_storage_blob *bpf_inode( > - const struct inode *inode) > -{ > - return NULL; > -} > - > -static inline void bpf_inode_storage_free(struct inode *inode) > -{ > -} > - > static inline void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, > bpf_func_t *bpf_func) > { > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 3559446279c1..479097e4dd5b 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -79,6 +79,7 @@ struct fs_context; > struct fs_parameter_spec; > struct fileattr; > struct iomap_ops; > +struct bpf_local_storage; > > extern void __init inode_init(void); > extern void __init inode_init_early(void); > @@ -648,6 +649,9 @@ struct inode { > #ifdef CONFIG_SECURITY > void *i_security; > #endif > +#ifdef CONFIG_BPF_SYSCALL > + struct bpf_local_storage __rcu *i_bpf_storage; > +#endif Sorry, we're not growing struct inode for this. It just keeps getting bigger. Last cycle we freed up 8 bytes to shrink it and we're not going to waste them on special-purpose stuff. We already NAKed someone else's pet field here.
Hi Christian, Thanks for your review. > On Nov 13, 2024, at 2:19 AM, Christian Brauner <brauner@kernel.org> wrote: [...] >> diff --git a/include/linux/fs.h b/include/linux/fs.h >> index 3559446279c1..479097e4dd5b 100644 >> --- a/include/linux/fs.h >> +++ b/include/linux/fs.h >> @@ -79,6 +79,7 @@ struct fs_context; >> struct fs_parameter_spec; >> struct fileattr; >> struct iomap_ops; >> +struct bpf_local_storage; >> >> extern void __init inode_init(void); >> extern void __init inode_init_early(void); >> @@ -648,6 +649,9 @@ struct inode { >> #ifdef CONFIG_SECURITY >> void *i_security; >> #endif >> +#ifdef CONFIG_BPF_SYSCALL >> + struct bpf_local_storage __rcu *i_bpf_storage; >> +#endif > > Sorry, we're not growing struct inode for this. It just keeps getting > bigger. Last cycle we freed up 8 bytes to shrink it and we're not going > to waste them on special-purpose stuff. We already NAKed someone else's > pet field here. Would it be acceptable if we union i_bpf_storage with i_security? IOW, if CONFIG_SECURITY is enabled, we will use existing logic. If CONFIG_SECURITY is not enabled, we will use i_bpf_storage. Given majority of default configs have CONFIG_SECURITY=y, this will not grow inode for most users. OTOH, users with CONFIG_SECURITY=n && CONFIG_BPF_SYSCALL=y combination can still use inode local storage in the tracing BPF programs. Does this make sense? Thanks, Song
On 11/13/2024 6:15 AM, Song Liu wrote: > Hi Christian, > > Thanks for your review. > >> On Nov 13, 2024, at 2:19 AM, Christian Brauner <brauner@kernel.org> wrote: > [...] > >>> diff --git a/include/linux/fs.h b/include/linux/fs.h >>> index 3559446279c1..479097e4dd5b 100644 >>> --- a/include/linux/fs.h >>> +++ b/include/linux/fs.h >>> @@ -79,6 +79,7 @@ struct fs_context; >>> struct fs_parameter_spec; >>> struct fileattr; >>> struct iomap_ops; >>> +struct bpf_local_storage; >>> >>> extern void __init inode_init(void); >>> extern void __init inode_init_early(void); >>> @@ -648,6 +649,9 @@ struct inode { >>> #ifdef CONFIG_SECURITY >>> void *i_security; >>> #endif >>> +#ifdef CONFIG_BPF_SYSCALL >>> + struct bpf_local_storage __rcu *i_bpf_storage; >>> +#endif >> Sorry, we're not growing struct inode for this. It just keeps getting >> bigger. Last cycle we freed up 8 bytes to shrink it and we're not going >> to waste them on special-purpose stuff. We already NAKed someone else's >> pet field here. > Would it be acceptable if we union i_bpf_storage with i_security? No! > IOW, if CONFIG_SECURITY is enabled, we will use existing logic. > If CONFIG_SECURITY is not enabled, we will use i_bpf_storage. > Given majority of default configs have CONFIG_SECURITY=y, this > will not grow inode for most users. OTOH, users with > CONFIG_SECURITY=n && CONFIG_BPF_SYSCALL=y combination can still > use inode local storage in the tracing BPF programs. > > Does this make sense? All it would take is one BPF programmer assuming that CONFIG_SECURITY=n is the norm for this to blow up spectacularly. > > Thanks, > Song >
On Wed, Nov 13, 2024 at 10:30 AM Casey Schaufler <casey@schaufler-ca.com> wrote: > > On 11/13/2024 6:15 AM, Song Liu wrote: > > Hi Christian, > > > > Thanks for your review. > > > >> On Nov 13, 2024, at 2:19 AM, Christian Brauner <brauner@kernel.org> wrote: > > [...] > > > >>> diff --git a/include/linux/fs.h b/include/linux/fs.h > >>> index 3559446279c1..479097e4dd5b 100644 > >>> --- a/include/linux/fs.h > >>> +++ b/include/linux/fs.h > >>> @@ -79,6 +79,7 @@ struct fs_context; > >>> struct fs_parameter_spec; > >>> struct fileattr; > >>> struct iomap_ops; > >>> +struct bpf_local_storage; > >>> > >>> extern void __init inode_init(void); > >>> extern void __init inode_init_early(void); > >>> @@ -648,6 +649,9 @@ struct inode { > >>> #ifdef CONFIG_SECURITY > >>> void *i_security; > >>> #endif > >>> +#ifdef CONFIG_BPF_SYSCALL > >>> + struct bpf_local_storage __rcu *i_bpf_storage; > >>> +#endif > >> Sorry, we're not growing struct inode for this. It just keeps getting > >> bigger. Last cycle we freed up 8 bytes to shrink it and we're not going > >> to waste them on special-purpose stuff. We already NAKed someone else's > >> pet field here. > > Would it be acceptable if we union i_bpf_storage with i_security? > > No! > > > IOW, if CONFIG_SECURITY is enabled, we will use existing logic. > > If CONFIG_SECURITY is not enabled, we will use i_bpf_storage. > > Given majority of default configs have CONFIG_SECURITY=y, this > > will not grow inode for most users. OTOH, users with > > CONFIG_SECURITY=n && CONFIG_BPF_SYSCALL=y combination can still > > use inode local storage in the tracing BPF programs. > > > > Does this make sense? > > All it would take is one BPF programmer assuming that CONFIG_SECURITY=n > is the norm for this to blow up spectacularly. I seriously don't understand what would blow up and how. Can you be more specific? Thanks, Song
diff --git a/fs/inode.c b/fs/inode.c index 8dabb224f941..3c679578169f 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -286,6 +286,7 @@ static struct inode *alloc_inode(struct super_block *sb) void __destroy_inode(struct inode *inode) { BUG_ON(inode_has_buffers(inode)); + bpf_inode_storage_free(inode); inode_detach_wb(inode); security_inode_free(inode); fsnotify_inode_delete(inode); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1b84613b10ac..0b31d2e74df6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2672,6 +2672,7 @@ struct bpf_link *bpf_link_by_id(u32 id); const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); void bpf_task_storage_free(struct task_struct *task); +void bpf_inode_storage_free(struct inode *inode); void bpf_cgrp_storage_free(struct cgroup *cgroup); bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); const struct btf_func_model * @@ -2942,6 +2943,10 @@ static inline void bpf_task_storage_free(struct task_struct *task) { } +static inline void bpf_inode_storage_free(struct inode *inode) +{ +} + static inline bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog) { return false; @@ -3305,6 +3310,10 @@ extern const struct bpf_func_proto bpf_task_storage_get_recur_proto; extern const struct bpf_func_proto bpf_task_storage_get_proto; extern const struct bpf_func_proto bpf_task_storage_delete_recur_proto; extern const struct bpf_func_proto bpf_task_storage_delete_proto; +extern const struct bpf_func_proto bpf_inode_storage_get_proto; +extern const struct bpf_func_proto bpf_inode_storage_get_recur_proto; +extern const struct bpf_func_proto bpf_inode_storage_delete_proto; +extern const struct bpf_func_proto bpf_inode_storage_delete_recur_proto; extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; extern const struct bpf_func_proto bpf_sk_setsockopt_proto; diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index aefcd6564251..a819c2f0a062 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -19,31 +19,12 @@ #include <linux/lsm_hook_defs.h> #undef LSM_HOOK -struct bpf_storage_blob { - struct bpf_local_storage __rcu *storage; -}; - -extern struct lsm_blob_sizes bpf_lsm_blob_sizes; - int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, const struct bpf_prog *prog); bool bpf_lsm_is_sleepable_hook(u32 btf_id); bool bpf_lsm_is_trusted(const struct bpf_prog *prog); -static inline struct bpf_storage_blob *bpf_inode( - const struct inode *inode) -{ - if (unlikely(!inode->i_security)) - return NULL; - - return inode->i_security + bpf_lsm_blob_sizes.lbs_inode; -} - -extern const struct bpf_func_proto bpf_inode_storage_get_proto; -extern const struct bpf_func_proto bpf_inode_storage_delete_proto; -void bpf_inode_storage_free(struct inode *inode); - void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func); int bpf_lsm_get_retval_range(const struct bpf_prog *prog, @@ -66,16 +47,6 @@ static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, return -EOPNOTSUPP; } -static inline struct bpf_storage_blob *bpf_inode( - const struct inode *inode) -{ - return NULL; -} - -static inline void bpf_inode_storage_free(struct inode *inode) -{ -} - static inline void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 3559446279c1..479097e4dd5b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -79,6 +79,7 @@ struct fs_context; struct fs_parameter_spec; struct fileattr; struct iomap_ops; +struct bpf_local_storage; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -648,6 +649,9 @@ struct inode { #ifdef CONFIG_SECURITY void *i_security; #endif +#ifdef CONFIG_BPF_SYSCALL + struct bpf_local_storage __rcu *i_bpf_storage; +#endif /* Stat data, not accessed from path walking */ unsigned long i_ino; diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 105328f0b9c0..73604c7130f1 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -10,8 +10,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o -obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o -obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o +obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o bpf_inode_storage.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o mprog.o obj-$(CONFIG_BPF_JIT) += trampoline.o obj-$(CONFIG_BPF_SYSCALL) += btf.o memalloc.o diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 44ccebc745e5..8d5a9bfe6643 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -21,16 +21,11 @@ DEFINE_BPF_STORAGE_CACHE(inode_cache); -static struct bpf_local_storage __rcu ** -inode_storage_ptr(void *owner) +static struct bpf_local_storage __rcu **inode_storage_ptr(void *owner) { struct inode *inode = owner; - struct bpf_storage_blob *bsb; - bsb = bpf_inode(inode); - if (!bsb) - return NULL; - return &bsb->storage; + return &inode->i_bpf_storage; } static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode, @@ -39,14 +34,9 @@ static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode, { struct bpf_local_storage *inode_storage; struct bpf_local_storage_map *smap; - struct bpf_storage_blob *bsb; - - bsb = bpf_inode(inode); - if (!bsb) - return NULL; inode_storage = - rcu_dereference_check(bsb->storage, bpf_rcu_lock_held()); + rcu_dereference_check(inode->i_bpf_storage, bpf_rcu_lock_held()); if (!inode_storage) return NULL; @@ -57,15 +47,10 @@ static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode, void bpf_inode_storage_free(struct inode *inode) { struct bpf_local_storage *local_storage; - struct bpf_storage_blob *bsb; - - bsb = bpf_inode(inode); - if (!bsb) - return; rcu_read_lock(); - local_storage = rcu_dereference(bsb->storage); + local_storage = rcu_dereference(inode->i_bpf_storage); if (!local_storage) { rcu_read_unlock(); return; @@ -95,8 +80,6 @@ static long bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key, if (fd_empty(f)) return -EBADF; - if (!inode_storage_ptr(file_inode(fd_file(f)))) - return -EBADF; sdata = bpf_local_storage_update(file_inode(fd_file(f)), (struct bpf_local_storage_map *)map, @@ -136,12 +119,7 @@ BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) return (unsigned long)NULL; - /* explicitly check that the inode_storage_ptr is not - * NULL as inode_storage_lookup returns NULL in this case and - * bpf_local_storage_update expects the owner to have a - * valid storage pointer. - */ - if (!inode || !inode_storage_ptr(inode)) + if (!inode) return (unsigned long)NULL; sdata = inode_storage_lookup(inode, map, true); diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 3bc61628ab25..6b6e0730e515 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -231,10 +231,6 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } switch (func_id) { - case BPF_FUNC_inode_storage_get: - return &bpf_inode_storage_get_proto; - case BPF_FUNC_inode_storage_delete: - return &bpf_inode_storage_delete_proto; #ifdef CONFIG_NET case BPF_FUNC_sk_storage_get: return &bpf_sk_storage_get_proto; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 949a3870946c..262bd101ea0b 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1553,6 +1553,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) if (bpf_prog_check_recur(prog)) return &bpf_task_storage_delete_recur_proto; return &bpf_task_storage_delete_proto; + case BPF_FUNC_inode_storage_get: + return &bpf_inode_storage_get_proto; + case BPF_FUNC_inode_storage_delete: + return &bpf_inode_storage_delete_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; case BPF_FUNC_snprintf: diff --git a/security/bpf/hooks.c b/security/bpf/hooks.c index db759025abe1..67719a04bb0b 100644 --- a/security/bpf/hooks.c +++ b/security/bpf/hooks.c @@ -12,7 +12,6 @@ static struct security_hook_list bpf_lsm_hooks[] __ro_after_init = { LSM_HOOK_INIT(NAME, bpf_lsm_##NAME), #include <linux/lsm_hook_defs.h> #undef LSM_HOOK - LSM_HOOK_INIT(inode_free_security, bpf_inode_storage_free), }; static const struct lsm_id bpf_lsmid = { @@ -28,12 +27,7 @@ static int __init bpf_lsm_init(void) return 0; } -struct lsm_blob_sizes bpf_lsm_blob_sizes __ro_after_init = { - .lbs_inode = sizeof(struct bpf_storage_blob), -}; - DEFINE_LSM(bpf) = { .name = "bpf", .init = bpf_lsm_init, - .blobs = &bpf_lsm_blob_sizes };
inode storage can be useful for non-LSM program. For example, file* tools from bcc/libbpf-tools can use inode storage instead of hash map; fanotify fastpath [1] can also use inode storage to store useful data. Make inode storage available for tracing program. Move bpf inode storage from a security blob to inode->i_bpf_storage, and adjust related code accordingly. [1] https://lore.kernel.org/linux-fsdevel/20241029231244.2834368-1-song@kernel.org/ Signed-off-by: Song Liu <song@kernel.org> --- fs/inode.c | 1 + include/linux/bpf.h | 9 +++++++++ include/linux/bpf_lsm.h | 29 ----------------------------- include/linux/fs.h | 4 ++++ kernel/bpf/Makefile | 3 +-- kernel/bpf/bpf_inode_storage.c | 32 +++++--------------------------- kernel/bpf/bpf_lsm.c | 4 ---- kernel/trace/bpf_trace.c | 4 ++++ security/bpf/hooks.c | 6 ------ 9 files changed, 24 insertions(+), 68 deletions(-)