diff mbox series

[v3,bpf-next,02/14] bpf: refactor BPF_PROG_RUN_ARRAY family of macros into functions

Message ID 20210730053413.1090371-3-andrii@kernel.org (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series BPF perf link and user-provided bpf_cookie | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 7 maintainers not CCed: netdev@vger.kernel.org kpsingh@kernel.org mingo@redhat.com kafai@fb.com rostedt@goodmis.org john.fastabend@gmail.com songliubraving@fb.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 11808 this patch: 11808
netdev/kdoc success Errors and warnings before: 3 this patch: 3
netdev/verify_fixes success Link
netdev/checkpatch fail CHECK: Alignment should match open parenthesis CHECK: Please don't use multiple blank lines ERROR: code indent should use tabs where possible WARNING: line length of 81 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 11310 this patch: 11310
netdev/header_inline success Link

Commit Message

Andrii Nakryiko July 30, 2021, 5:34 a.m. UTC
Similar to BPF_PROG_RUN, turn BPF_PROG_RUN_ARRAY macros into proper functions
with all the same readability and maintainability benefits. Making them into
functions required shuffling around bpf_set_run_ctx/bpf_reset_run_ctx
functions. Also, explicitly specifying the type of the BPF prog run callback
required adjusting __bpf_prog_run_save_cb() to accept const void *, casted
internally to const struct sk_buff.

Further, split out a cgroup-specific BPF_PROG_RUN_ARRAY_CG and
BPF_PROG_RUN_ARRAY_CG_FLAGS from the more generic BPF_PROG_RUN_ARRAY due to
the differences in bpf_run_ctx used for those two different use cases.

I think BPF_PROG_RUN_ARRAY_CG would benefit from further refactoring to accept
struct cgroup and enum bpf_attach_type instead of bpf_prog_array, fetching
cgrp->bpf.effective[type] and RCU-dereferencing it internally. But that
required including include/linux/cgroup-defs.h, which I wasn't sure is ok with
everyone.

The remaining generic BPF_PROG_RUN_ARRAY function will be extended to
pass-through user-provided context value in the next patch.

Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/bpf.h      | 187 +++++++++++++++++++++++----------------
 include/linux/filter.h   |   5 +-
 kernel/bpf/cgroup.c      |  32 +++----
 kernel/trace/bpf_trace.c |   2 +-
 4 files changed, 132 insertions(+), 94 deletions(-)

Comments

Daniel Borkmann Aug. 9, 2021, 11 p.m. UTC | #1
On 7/30/21 7:34 AM, Andrii Nakryiko wrote:
> Similar to BPF_PROG_RUN, turn BPF_PROG_RUN_ARRAY macros into proper functions
> with all the same readability and maintainability benefits. Making them into
> functions required shuffling around bpf_set_run_ctx/bpf_reset_run_ctx
> functions. Also, explicitly specifying the type of the BPF prog run callback
> required adjusting __bpf_prog_run_save_cb() to accept const void *, casted
> internally to const struct sk_buff.
> 
> Further, split out a cgroup-specific BPF_PROG_RUN_ARRAY_CG and
> BPF_PROG_RUN_ARRAY_CG_FLAGS from the more generic BPF_PROG_RUN_ARRAY due to
> the differences in bpf_run_ctx used for those two different use cases.
> 
> I think BPF_PROG_RUN_ARRAY_CG would benefit from further refactoring to accept
> struct cgroup and enum bpf_attach_type instead of bpf_prog_array, fetching
> cgrp->bpf.effective[type] and RCU-dereferencing it internally. But that
> required including include/linux/cgroup-defs.h, which I wasn't sure is ok with
> everyone.
> 
> The remaining generic BPF_PROG_RUN_ARRAY function will be extended to
> pass-through user-provided context value in the next patch.
> 
> Acked-by: Yonghong Song <yhs@fb.com>
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
>   include/linux/bpf.h      | 187 +++++++++++++++++++++++----------------
>   include/linux/filter.h   |   5 +-
>   kernel/bpf/cgroup.c      |  32 +++----
>   kernel/trace/bpf_trace.c |   2 +-
>   4 files changed, 132 insertions(+), 94 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index c8cc09013210..9c44b56b698f 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1146,67 +1146,124 @@ struct bpf_run_ctx {};
>   
>   struct bpf_cg_run_ctx {
>   	struct bpf_run_ctx run_ctx;
> -	struct bpf_prog_array_item *prog_item;
> +	const struct bpf_prog_array_item *prog_item;
>   };
>   
> +#ifdef CONFIG_BPF_SYSCALL
> +static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
> +{
> +	struct bpf_run_ctx *old_ctx;
> +
> +	old_ctx = current->bpf_ctx;
> +	current->bpf_ctx = new_ctx;
> +	return old_ctx;
> +}
> +
> +static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
> +{
> +	current->bpf_ctx = old_ctx;
> +}
> +#else /* CONFIG_BPF_SYSCALL */
> +static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
> +{
> +	return NULL;
> +}
> +
> +static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
> +{
> +}
> +#endif /* CONFIG_BPF_SYSCALL */

nit, but either is fine..:

static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
{
	struct bpf_run_ctx *old_ctx = NULL;

#ifdef CONFIG_BPF_SYSCALL
	old_ctx = current->bpf_ctx;
	current->bpf_ctx = new_ctx;
#endif
	return old_ctx;
}

static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
{
#ifdef CONFIG_BPF_SYSCALL
	current->bpf_ctx = old_ctx;
#endif
}

>   /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
>   #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE			(1 << 0)
>   /* BPF program asks to set CN on the packet. */
>   #define BPF_RET_SET_CN						(1 << 0)
>   
> -#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags)		\
> -	({								\
> -		struct bpf_prog_array_item *_item;			\
> -		struct bpf_prog *_prog;					\
> -		struct bpf_prog_array *_array;				\
> -		struct bpf_run_ctx *old_run_ctx;			\
> -		struct bpf_cg_run_ctx run_ctx;				\
> -		u32 _ret = 1;						\
> -		u32 func_ret;						\
> -		migrate_disable();					\
> -		rcu_read_lock();					\
> -		_array = rcu_dereference(array);			\
> -		_item = &_array->items[0];				\
> -		old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);	\
> -		while ((_prog = READ_ONCE(_item->prog))) {		\
> -			run_ctx.prog_item = _item;			\
> -			func_ret = func(_prog, ctx);			\
> -			_ret &= (func_ret & 1);				\
> -			*(ret_flags) |= (func_ret >> 1);		\
> -			_item++;					\
> -		}							\
> -		bpf_reset_run_ctx(old_run_ctx);				\
> -		rcu_read_unlock();					\
> -		migrate_enable();					\
> -		_ret;							\
> -	 })
> -
> -#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null, set_cg_storage)	\
> -	({						\
> -		struct bpf_prog_array_item *_item;	\
> -		struct bpf_prog *_prog;			\
> -		struct bpf_prog_array *_array;		\
> -		struct bpf_run_ctx *old_run_ctx;	\
> -		struct bpf_cg_run_ctx run_ctx;		\
> -		u32 _ret = 1;				\
> -		migrate_disable();			\
> -		rcu_read_lock();			\
> -		_array = rcu_dereference(array);	\
> -		if (unlikely(check_non_null && !_array))\
> -			goto _out;			\
> -		_item = &_array->items[0];		\
> -		old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);\
> -		while ((_prog = READ_ONCE(_item->prog))) {	\
> -			run_ctx.prog_item = _item;	\
> -			_ret &= func(_prog, ctx);	\
> -			_item++;			\
> -		}					\
> -		bpf_reset_run_ctx(old_run_ctx);		\
> -_out:							\
> -		rcu_read_unlock();			\
> -		migrate_enable();			\
> -		_ret;					\
> -	 })
> +typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
> +
> +static __always_inline u32
> +BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
> +			    const void *ctx, bpf_prog_run_fn run_prog,
> +			    u32 *ret_flags)
> +{
> +	const struct bpf_prog_array_item *item;
> +	const struct bpf_prog *prog;
> +	const struct bpf_prog_array *array;
> +	struct bpf_run_ctx *old_run_ctx;
> +	struct bpf_cg_run_ctx run_ctx;
> +	u32 ret = 1;
> +	u32 func_ret;
> +
> +	migrate_disable();
> +	rcu_read_lock();
> +	array = rcu_dereference(array_rcu);
> +	item = &array->items[0];
> +	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
> +	while ((prog = READ_ONCE(item->prog))) {
> +		run_ctx.prog_item = item;
> +		func_ret = run_prog(prog, ctx);
> +		ret &= (func_ret & 1);
> +		*(ret_flags) |= (func_ret >> 1);
> +		item++;
> +	}
> +	bpf_reset_run_ctx(old_run_ctx);
> +	rcu_read_unlock();
> +	migrate_enable();
> +	return ret;
> +}
> +
> +static __always_inline u32
> +BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
> +		      const void *ctx, bpf_prog_run_fn run_prog)
> +{
> +	const struct bpf_prog_array_item *item;
> +	const struct bpf_prog *prog;
> +	const struct bpf_prog_array *array;
> +	struct bpf_run_ctx *old_run_ctx;
> +	struct bpf_cg_run_ctx run_ctx;
> +	u32 ret = 1;
> +
> +	migrate_disable();
> +	rcu_read_lock();
> +	array = rcu_dereference(array_rcu);
> +	item = &array->items[0];
> +	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
> +	while ((prog = READ_ONCE(item->prog))) {
> +		run_ctx.prog_item = item;
> +		ret &= run_prog(prog, ctx);
> +		item++;
> +	}
> +	bpf_reset_run_ctx(old_run_ctx);
> +	rcu_read_unlock();
> +	migrate_enable();
> +	return ret;
> +}
> +
> +static __always_inline u32
> +BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
> +		   const void *ctx, bpf_prog_run_fn run_prog)
> +{
> +	const struct bpf_prog_array_item *item;
> +	const struct bpf_prog *prog;
> +	const struct bpf_prog_array *array;
> +	u32 ret = 1;
> +
> +	migrate_disable();
> +	rcu_read_lock();
> +	array = rcu_dereference(array_rcu);
> +	if (unlikely(!array))
> +		goto out;
> +	item = &array->items[0];
> +	while ((prog = READ_ONCE(item->prog))) {
> +		ret &= run_prog(prog, ctx);
> +		item++;
> +	}
> +out:
> +	rcu_read_unlock();
> +	migrate_enable();
> +	return ret;
> +}

Is there any way we could consolidate the above somewhat further and have things
optimized out at compilation time, e.g. when const args are null/non-null? :/

>   /* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
>    * so BPF programs can request cwr for TCP packets.
> @@ -1235,7 +1292,7 @@ _out:							\
>   		u32 _flags = 0;				\
[...]
Andrii Nakryiko Aug. 10, 2021, 12:36 a.m. UTC | #2
On Mon, Aug 9, 2021 at 4:00 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 7/30/21 7:34 AM, Andrii Nakryiko wrote:
> > Similar to BPF_PROG_RUN, turn BPF_PROG_RUN_ARRAY macros into proper functions
> > with all the same readability and maintainability benefits. Making them into
> > functions required shuffling around bpf_set_run_ctx/bpf_reset_run_ctx
> > functions. Also, explicitly specifying the type of the BPF prog run callback
> > required adjusting __bpf_prog_run_save_cb() to accept const void *, casted
> > internally to const struct sk_buff.
> >
> > Further, split out a cgroup-specific BPF_PROG_RUN_ARRAY_CG and
> > BPF_PROG_RUN_ARRAY_CG_FLAGS from the more generic BPF_PROG_RUN_ARRAY due to
> > the differences in bpf_run_ctx used for those two different use cases.
> >
> > I think BPF_PROG_RUN_ARRAY_CG would benefit from further refactoring to accept
> > struct cgroup and enum bpf_attach_type instead of bpf_prog_array, fetching
> > cgrp->bpf.effective[type] and RCU-dereferencing it internally. But that
> > required including include/linux/cgroup-defs.h, which I wasn't sure is ok with
> > everyone.
> >
> > The remaining generic BPF_PROG_RUN_ARRAY function will be extended to
> > pass-through user-provided context value in the next patch.
> >
> > Acked-by: Yonghong Song <yhs@fb.com>
> > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > ---
> >   include/linux/bpf.h      | 187 +++++++++++++++++++++++----------------
> >   include/linux/filter.h   |   5 +-
> >   kernel/bpf/cgroup.c      |  32 +++----
> >   kernel/trace/bpf_trace.c |   2 +-
> >   4 files changed, 132 insertions(+), 94 deletions(-)
> >
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index c8cc09013210..9c44b56b698f 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -1146,67 +1146,124 @@ struct bpf_run_ctx {};
> >
> >   struct bpf_cg_run_ctx {
> >       struct bpf_run_ctx run_ctx;
> > -     struct bpf_prog_array_item *prog_item;
> > +     const struct bpf_prog_array_item *prog_item;
> >   };
> >
> > +#ifdef CONFIG_BPF_SYSCALL
> > +static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
> > +{
> > +     struct bpf_run_ctx *old_ctx;
> > +
> > +     old_ctx = current->bpf_ctx;
> > +     current->bpf_ctx = new_ctx;
> > +     return old_ctx;
> > +}
> > +
> > +static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
> > +{
> > +     current->bpf_ctx = old_ctx;
> > +}
> > +#else /* CONFIG_BPF_SYSCALL */
> > +static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
> > +{
> > +     return NULL;
> > +}
> > +
> > +static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
> > +{
> > +}
> > +#endif /* CONFIG_BPF_SYSCALL */
>
> nit, but either is fine..:
>
> static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
> {
>         struct bpf_run_ctx *old_ctx = NULL;
>
> #ifdef CONFIG_BPF_SYSCALL
>         old_ctx = current->bpf_ctx;
>         current->bpf_ctx = new_ctx;
> #endif
>         return old_ctx;
> }
>
> static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
> {
> #ifdef CONFIG_BPF_SYSCALL
>         current->bpf_ctx = old_ctx;
> #endif
> }

sure, I don't mind

>
> >   /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
> >   #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE                        (1 << 0)
> >   /* BPF program asks to set CN on the packet. */
> >   #define BPF_RET_SET_CN                                              (1 << 0)
> >
> > -#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags)                \
> > -     ({                                                              \
> > -             struct bpf_prog_array_item *_item;                      \
> > -             struct bpf_prog *_prog;                                 \
> > -             struct bpf_prog_array *_array;                          \
> > -             struct bpf_run_ctx *old_run_ctx;                        \
> > -             struct bpf_cg_run_ctx run_ctx;                          \
> > -             u32 _ret = 1;                                           \
> > -             u32 func_ret;                                           \
> > -             migrate_disable();                                      \
> > -             rcu_read_lock();                                        \
> > -             _array = rcu_dereference(array);                        \
> > -             _item = &_array->items[0];                              \
> > -             old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);        \
> > -             while ((_prog = READ_ONCE(_item->prog))) {              \
> > -                     run_ctx.prog_item = _item;                      \
> > -                     func_ret = func(_prog, ctx);                    \
> > -                     _ret &= (func_ret & 1);                         \
> > -                     *(ret_flags) |= (func_ret >> 1);                \
> > -                     _item++;                                        \
> > -             }                                                       \
> > -             bpf_reset_run_ctx(old_run_ctx);                         \
> > -             rcu_read_unlock();                                      \
> > -             migrate_enable();                                       \
> > -             _ret;                                                   \
> > -      })
> > -
> > -#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null, set_cg_storage)       \
> > -     ({                                              \
> > -             struct bpf_prog_array_item *_item;      \
> > -             struct bpf_prog *_prog;                 \
> > -             struct bpf_prog_array *_array;          \
> > -             struct bpf_run_ctx *old_run_ctx;        \
> > -             struct bpf_cg_run_ctx run_ctx;          \
> > -             u32 _ret = 1;                           \
> > -             migrate_disable();                      \
> > -             rcu_read_lock();                        \
> > -             _array = rcu_dereference(array);        \
> > -             if (unlikely(check_non_null && !_array))\
> > -                     goto _out;                      \
> > -             _item = &_array->items[0];              \
> > -             old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);\
> > -             while ((_prog = READ_ONCE(_item->prog))) {      \
> > -                     run_ctx.prog_item = _item;      \
> > -                     _ret &= func(_prog, ctx);       \
> > -                     _item++;                        \
> > -             }                                       \
> > -             bpf_reset_run_ctx(old_run_ctx);         \
> > -_out:                                                        \
> > -             rcu_read_unlock();                      \
> > -             migrate_enable();                       \
> > -             _ret;                                   \
> > -      })
> > +typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
> > +
> > +static __always_inline u32
> > +BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
> > +                         const void *ctx, bpf_prog_run_fn run_prog,
> > +                         u32 *ret_flags)
> > +{
> > +     const struct bpf_prog_array_item *item;
> > +     const struct bpf_prog *prog;
> > +     const struct bpf_prog_array *array;
> > +     struct bpf_run_ctx *old_run_ctx;
> > +     struct bpf_cg_run_ctx run_ctx;
> > +     u32 ret = 1;
> > +     u32 func_ret;
> > +
> > +     migrate_disable();
> > +     rcu_read_lock();
> > +     array = rcu_dereference(array_rcu);
> > +     item = &array->items[0];
> > +     old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
> > +     while ((prog = READ_ONCE(item->prog))) {
> > +             run_ctx.prog_item = item;
> > +             func_ret = run_prog(prog, ctx);
> > +             ret &= (func_ret & 1);
> > +             *(ret_flags) |= (func_ret >> 1);
> > +             item++;
> > +     }
> > +     bpf_reset_run_ctx(old_run_ctx);
> > +     rcu_read_unlock();
> > +     migrate_enable();
> > +     return ret;
> > +}
> > +
> > +static __always_inline u32
> > +BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
> > +                   const void *ctx, bpf_prog_run_fn run_prog)
> > +{
> > +     const struct bpf_prog_array_item *item;
> > +     const struct bpf_prog *prog;
> > +     const struct bpf_prog_array *array;
> > +     struct bpf_run_ctx *old_run_ctx;
> > +     struct bpf_cg_run_ctx run_ctx;
> > +     u32 ret = 1;
> > +
> > +     migrate_disable();
> > +     rcu_read_lock();
> > +     array = rcu_dereference(array_rcu);
> > +     item = &array->items[0];
> > +     old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
> > +     while ((prog = READ_ONCE(item->prog))) {
> > +             run_ctx.prog_item = item;
> > +             ret &= run_prog(prog, ctx);
> > +             item++;
> > +     }
> > +     bpf_reset_run_ctx(old_run_ctx);
> > +     rcu_read_unlock();
> > +     migrate_enable();
> > +     return ret;
> > +}
> > +
> > +static __always_inline u32
> > +BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
> > +                const void *ctx, bpf_prog_run_fn run_prog)
> > +{
> > +     const struct bpf_prog_array_item *item;
> > +     const struct bpf_prog *prog;
> > +     const struct bpf_prog_array *array;
> > +     u32 ret = 1;
> > +
> > +     migrate_disable();
> > +     rcu_read_lock();
> > +     array = rcu_dereference(array_rcu);
> > +     if (unlikely(!array))
> > +             goto out;
> > +     item = &array->items[0];
> > +     while ((prog = READ_ONCE(item->prog))) {
> > +             ret &= run_prog(prog, ctx);
> > +             item++;
> > +     }
> > +out:
> > +     rcu_read_unlock();
> > +     migrate_enable();
> > +     return ret;
> > +}
>
> Is there any way we could consolidate the above somewhat further and have things
> optimized out at compilation time, e.g. when const args are null/non-null? :/

Do you mean like passing "bool check_for_null" as an extra argument and then do

if (check_for_null && unlikely(!array))
    goto out;

?

I feel like that actually makes a bigger mess and just unnecessarily
obfuscates code, while saving just a few lines of straightforward
code. We didn't even do that for BPF_PROG_RUN_ARRAY_FLAGS vs
__BPF_PROG_RUN_ARRAY before, even though there are about 2 lines of
code differences.

But also in one of the next patches I'm adding perf-specific
bpf_perf_run_ctx (different from bpf_cg_run_ctx), so it will make
sharing the logic within these BPF_PROG_RUN_ARRAY*  even harder and
more convoluted.

Or were you talking about some other aspect here?

>
> >   /* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
> >    * so BPF programs can request cwr for TCP packets.
> > @@ -1235,7 +1292,7 @@ _out:                                                   \
> >               u32 _flags = 0;                         \
> [...]
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c8cc09013210..9c44b56b698f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1146,67 +1146,124 @@  struct bpf_run_ctx {};
 
 struct bpf_cg_run_ctx {
 	struct bpf_run_ctx run_ctx;
-	struct bpf_prog_array_item *prog_item;
+	const struct bpf_prog_array_item *prog_item;
 };
 
+#ifdef CONFIG_BPF_SYSCALL
+static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
+{
+	struct bpf_run_ctx *old_ctx;
+
+	old_ctx = current->bpf_ctx;
+	current->bpf_ctx = new_ctx;
+	return old_ctx;
+}
+
+static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
+{
+	current->bpf_ctx = old_ctx;
+}
+#else /* CONFIG_BPF_SYSCALL */
+static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
+{
+	return NULL;
+}
+
+static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
+{
+}
+#endif /* CONFIG_BPF_SYSCALL */
+
+
 /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
 #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE			(1 << 0)
 /* BPF program asks to set CN on the packet. */
 #define BPF_RET_SET_CN						(1 << 0)
 
-#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags)		\
-	({								\
-		struct bpf_prog_array_item *_item;			\
-		struct bpf_prog *_prog;					\
-		struct bpf_prog_array *_array;				\
-		struct bpf_run_ctx *old_run_ctx;			\
-		struct bpf_cg_run_ctx run_ctx;				\
-		u32 _ret = 1;						\
-		u32 func_ret;						\
-		migrate_disable();					\
-		rcu_read_lock();					\
-		_array = rcu_dereference(array);			\
-		_item = &_array->items[0];				\
-		old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);	\
-		while ((_prog = READ_ONCE(_item->prog))) {		\
-			run_ctx.prog_item = _item;			\
-			func_ret = func(_prog, ctx);			\
-			_ret &= (func_ret & 1);				\
-			*(ret_flags) |= (func_ret >> 1);		\
-			_item++;					\
-		}							\
-		bpf_reset_run_ctx(old_run_ctx);				\
-		rcu_read_unlock();					\
-		migrate_enable();					\
-		_ret;							\
-	 })
-
-#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null, set_cg_storage)	\
-	({						\
-		struct bpf_prog_array_item *_item;	\
-		struct bpf_prog *_prog;			\
-		struct bpf_prog_array *_array;		\
-		struct bpf_run_ctx *old_run_ctx;	\
-		struct bpf_cg_run_ctx run_ctx;		\
-		u32 _ret = 1;				\
-		migrate_disable();			\
-		rcu_read_lock();			\
-		_array = rcu_dereference(array);	\
-		if (unlikely(check_non_null && !_array))\
-			goto _out;			\
-		_item = &_array->items[0];		\
-		old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);\
-		while ((_prog = READ_ONCE(_item->prog))) {	\
-			run_ctx.prog_item = _item;	\
-			_ret &= func(_prog, ctx);	\
-			_item++;			\
-		}					\
-		bpf_reset_run_ctx(old_run_ctx);		\
-_out:							\
-		rcu_read_unlock();			\
-		migrate_enable();			\
-		_ret;					\
-	 })
+typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
+
+static __always_inline u32
+BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
+			    const void *ctx, bpf_prog_run_fn run_prog,
+			    u32 *ret_flags)
+{
+	const struct bpf_prog_array_item *item;
+	const struct bpf_prog *prog;
+	const struct bpf_prog_array *array;
+	struct bpf_run_ctx *old_run_ctx;
+	struct bpf_cg_run_ctx run_ctx;
+	u32 ret = 1;
+	u32 func_ret;
+
+	migrate_disable();
+	rcu_read_lock();
+	array = rcu_dereference(array_rcu);
+	item = &array->items[0];
+	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+	while ((prog = READ_ONCE(item->prog))) {
+		run_ctx.prog_item = item;
+		func_ret = run_prog(prog, ctx);
+		ret &= (func_ret & 1);
+		*(ret_flags) |= (func_ret >> 1);
+		item++;
+	}
+	bpf_reset_run_ctx(old_run_ctx);
+	rcu_read_unlock();
+	migrate_enable();
+	return ret;
+}
+
+static __always_inline u32
+BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
+		      const void *ctx, bpf_prog_run_fn run_prog)
+{
+	const struct bpf_prog_array_item *item;
+	const struct bpf_prog *prog;
+	const struct bpf_prog_array *array;
+	struct bpf_run_ctx *old_run_ctx;
+	struct bpf_cg_run_ctx run_ctx;
+	u32 ret = 1;
+
+	migrate_disable();
+	rcu_read_lock();
+	array = rcu_dereference(array_rcu);
+	item = &array->items[0];
+	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+	while ((prog = READ_ONCE(item->prog))) {
+		run_ctx.prog_item = item;
+		ret &= run_prog(prog, ctx);
+		item++;
+	}
+	bpf_reset_run_ctx(old_run_ctx);
+	rcu_read_unlock();
+	migrate_enable();
+	return ret;
+}
+
+static __always_inline u32
+BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
+		   const void *ctx, bpf_prog_run_fn run_prog)
+{
+	const struct bpf_prog_array_item *item;
+	const struct bpf_prog *prog;
+	const struct bpf_prog_array *array;
+	u32 ret = 1;
+
+	migrate_disable();
+	rcu_read_lock();
+	array = rcu_dereference(array_rcu);
+	if (unlikely(!array))
+		goto out;
+	item = &array->items[0];
+	while ((prog = READ_ONCE(item->prog))) {
+		ret &= run_prog(prog, ctx);
+		item++;
+	}
+out:
+	rcu_read_unlock();
+	migrate_enable();
+	return ret;
+}
 
 /* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
  * so BPF programs can request cwr for TCP packets.
@@ -1235,7 +1292,7 @@  _out:							\
 		u32 _flags = 0;				\
 		bool _cn;				\
 		u32 _ret;				\
-		_ret = BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, &_flags); \
+		_ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
 		_cn = _flags & BPF_RET_SET_CN;		\
 		if (_ret)				\
 			_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);	\
@@ -1244,12 +1301,6 @@  _out:							\
 		_ret;					\
 	})
 
-#define BPF_PROG_RUN_ARRAY(array, ctx, func)		\
-	__BPF_PROG_RUN_ARRAY(array, ctx, func, false, true)
-
-#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func)	\
-	__BPF_PROG_RUN_ARRAY(array, ctx, func, true, false)
-
 #ifdef CONFIG_BPF_SYSCALL
 DECLARE_PER_CPU(int, bpf_prog_active);
 extern struct mutex bpf_stats_enabled_mutex;
@@ -1284,20 +1335,6 @@  static inline void bpf_enable_instrumentation(void)
 	migrate_enable();
 }
 
-static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
-{
-	struct bpf_run_ctx *old_ctx;
-
-	old_ctx = current->bpf_ctx;
-	current->bpf_ctx = new_ctx;
-	return old_ctx;
-}
-
-static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
-{
-	current->bpf_ctx = old_ctx;
-}
-
 extern const struct file_operations bpf_map_fops;
 extern const struct file_operations bpf_prog_fops;
 extern const struct file_operations bpf_iter_fops;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 18518e321ce4..830b98999a68 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -711,7 +711,7 @@  static inline void bpf_restore_data_end(
 	cb->data_end = saved_data_end;
 }
 
-static inline u8 *bpf_skb_cb(struct sk_buff *skb)
+static inline u8 *bpf_skb_cb(const struct sk_buff *skb)
 {
 	/* eBPF programs may read/write skb->cb[] area to transfer meta
 	 * data between tail calls. Since this also needs to work with
@@ -732,8 +732,9 @@  static inline u8 *bpf_skb_cb(struct sk_buff *skb)
 
 /* Must be invoked with migration disabled */
 static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
-					 struct sk_buff *skb)
+					 const void *ctx)
 {
+	const struct sk_buff *skb = ctx;
 	u8 *cb_data = bpf_skb_cb(skb);
 	u8 cb_saved[BPF_SKB_CB_LEN];
 	u32 res;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index b567ca46555c..dd2c0d4ae4f8 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1012,8 +1012,8 @@  int __cgroup_bpf_run_filter_skb(struct sock *sk,
 		ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
 			cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb);
 	} else {
-		ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
-					  __bpf_prog_run_save_cb);
+		ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[type], skb,
+					    __bpf_prog_run_save_cb);
 		ret = (ret == 1 ? 0 : -EPERM);
 	}
 	bpf_restore_data_end(skb, saved_data_end);
@@ -1043,7 +1043,7 @@  int __cgroup_bpf_run_filter_sk(struct sock *sk,
 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 	int ret;
 
-	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN);
+	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[type], sk, BPF_PROG_RUN);
 	return ret == 1 ? 0 : -EPERM;
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
@@ -1090,8 +1090,8 @@  int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
 	}
 
 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-	ret = BPF_PROG_RUN_ARRAY_FLAGS(cgrp->bpf.effective[type], &ctx,
-				       BPF_PROG_RUN, flags);
+	ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[type], &ctx,
+				          BPF_PROG_RUN, flags);
 
 	return ret == 1 ? 0 : -EPERM;
 }
@@ -1120,8 +1120,8 @@  int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 	int ret;
 
-	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops,
-				 BPF_PROG_RUN);
+	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[type], sock_ops,
+				    BPF_PROG_RUN);
 	return ret == 1 ? 0 : -EPERM;
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
@@ -1139,8 +1139,8 @@  int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 
 	rcu_read_lock();
 	cgrp = task_dfl_cgroup(current);
-	allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
-				   BPF_PROG_RUN);
+	allow = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[type], &ctx,
+				      BPF_PROG_RUN);
 	rcu_read_unlock();
 
 	return !allow;
@@ -1271,7 +1271,7 @@  int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
 
 	rcu_read_lock();
 	cgrp = task_dfl_cgroup(current);
-	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
+	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
 	rcu_read_unlock();
 
 	kfree(ctx.cur_val);
@@ -1385,8 +1385,8 @@  int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
 	}
 
 	lock_sock(sk);
-	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT],
-				 &ctx, BPF_PROG_RUN);
+	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT],
+				    &ctx, BPF_PROG_RUN);
 	release_sock(sk);
 
 	if (!ret) {
@@ -1495,8 +1495,8 @@  int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
 	}
 
 	lock_sock(sk);
-	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
-				 &ctx, BPF_PROG_RUN);
+	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
+				    &ctx, BPF_PROG_RUN);
 	release_sock(sk);
 
 	if (!ret) {
@@ -1556,8 +1556,8 @@  int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
 	 * be called if that data shouldn't be "exported".
 	 */
 
-	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
-				 &ctx, BPF_PROG_RUN);
+	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
+				    &ctx, BPF_PROG_RUN);
 	if (!ret)
 		return -EPERM;
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c5e0b6a64091..b427eac10780 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -124,7 +124,7 @@  unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
 	 * out of events when it was updated in between this and the
 	 * rcu_dereference() which is accepted risk.
 	 */
-	ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN);
+	ret = BPF_PROG_RUN_ARRAY(call->prog_array, ctx, bpf_prog_run);
 
  out:
 	__this_cpu_dec(bpf_prog_active);