diff mbox series

[bpf-next,v4,05/15] bpf: XDP metadata RX kfuncs

Message ID 20221213023605.737383-6-sdf@google.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series xdp: hints via kfuncs | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-2 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-4 fail Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for set-matrix
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4417 this patch: 4417
netdev/cc_maintainers warning 4 maintainers not CCed: edumazet@google.com davem@davemloft.net pabeni@redhat.com hawk@kernel.org
netdev/build_clang success Errors and warnings before: 1047 this patch: 1047
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4584 this patch: 4584
netdev/checkpatch fail ERROR: Macros with complex values should be enclosed in parentheses WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns
netdev/kdoc fail Errors and warnings before: 0 this patch: 1
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 fail Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-6 fail Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix

Commit Message

Stanislav Fomichev Dec. 13, 2022, 2:35 a.m. UTC
Define a new kfunc set (xdp_metadata_kfunc_ids) which implements all possible
XDP metatada kfuncs. Not all devices have to implement them. If kfunc is not
supported by the target device, the default implementation is called instead.
The verifier, at load time, replaces a call to the generic kfunc with a call
to the per-device one. Per-device kfunc pointers are stored in separate
struct xdp_metadata_ops.

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Anatoly Burakov <anatoly.burakov@intel.com>
Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
Cc: Maryam Tahhan <mtahhan@redhat.com>
Cc: xdp-hints@xdp-project.net
Cc: netdev@vger.kernel.org
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/linux/bpf.h       |  2 ++
 include/linux/netdevice.h |  7 +++++++
 include/net/xdp.h         | 25 ++++++++++++++++++++++
 kernel/bpf/core.c         |  7 +++++++
 kernel/bpf/offload.c      | 23 ++++++++++++++++++++
 kernel/bpf/verifier.c     | 29 +++++++++++++++++++++++++-
 net/core/xdp.c            | 44 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 136 insertions(+), 1 deletion(-)

Comments

David Vernet Dec. 13, 2022, 5 p.m. UTC | #1
On Mon, Dec 12, 2022 at 06:35:55PM -0800, Stanislav Fomichev wrote:
> Define a new kfunc set (xdp_metadata_kfunc_ids) which implements all possible
> XDP metatada kfuncs. Not all devices have to implement them. If kfunc is not
> supported by the target device, the default implementation is called instead.
> The verifier, at load time, replaces a call to the generic kfunc with a call
> to the per-device one. Per-device kfunc pointers are stored in separate
> struct xdp_metadata_ops.
> 
> Cc: John Fastabend <john.fastabend@gmail.com>
> Cc: David Ahern <dsahern@gmail.com>
> Cc: Martin KaFai Lau <martin.lau@linux.dev>
> Cc: Jakub Kicinski <kuba@kernel.org>
> Cc: Willem de Bruijn <willemb@google.com>
> Cc: Jesper Dangaard Brouer <brouer@redhat.com>
> Cc: Anatoly Burakov <anatoly.burakov@intel.com>
> Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
> Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
> Cc: Maryam Tahhan <mtahhan@redhat.com>
> Cc: xdp-hints@xdp-project.net
> Cc: netdev@vger.kernel.org
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>  include/linux/bpf.h       |  2 ++
>  include/linux/netdevice.h |  7 +++++++
>  include/net/xdp.h         | 25 ++++++++++++++++++++++
>  kernel/bpf/core.c         |  7 +++++++
>  kernel/bpf/offload.c      | 23 ++++++++++++++++++++
>  kernel/bpf/verifier.c     | 29 +++++++++++++++++++++++++-
>  net/core/xdp.c            | 44 +++++++++++++++++++++++++++++++++++++++
>  7 files changed, 136 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index ca22e8b8bd82..de6279725f41 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -2477,6 +2477,8 @@ void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
>  				       struct net_device *netdev);
>  bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev);
>  
> +void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id);
> +
>  void unpriv_ebpf_notify(int new_state);
>  
>  #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 5aa35c58c342..63786091c60d 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -74,6 +74,7 @@ struct udp_tunnel_nic_info;
>  struct udp_tunnel_nic;
>  struct bpf_prog;
>  struct xdp_buff;
> +struct xdp_md;
>  
>  void synchronize_net(void);
>  void netdev_set_default_ethtool_ops(struct net_device *dev,
> @@ -1613,6 +1614,11 @@ struct net_device_ops {
>  						  bool cycles);
>  };
>  
> +struct xdp_metadata_ops {
> +	int	(*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
> +	int	(*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash);
> +};
> +
>  /**
>   * enum netdev_priv_flags - &struct net_device priv_flags
>   *
> @@ -2044,6 +2050,7 @@ struct net_device {
>  	unsigned int		flags;
>  	unsigned long long	priv_flags;
>  	const struct net_device_ops *netdev_ops;
> +	const struct xdp_metadata_ops *xdp_metadata_ops;
>  	int			ifindex;
>  	unsigned short		gflags;
>  	unsigned short		hard_header_len;
> diff --git a/include/net/xdp.h b/include/net/xdp.h
> index 55dbc68bfffc..152c3a9c1127 100644
> --- a/include/net/xdp.h
> +++ b/include/net/xdp.h
> @@ -409,4 +409,29 @@ void xdp_attachment_setup(struct xdp_attachment_info *info,
>  
>  #define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE
>  
> +#define XDP_METADATA_KFUNC_xxx	\
> +	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_TIMESTAMP, \
> +			   bpf_xdp_metadata_rx_timestamp) \
> +	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \
> +			   bpf_xdp_metadata_rx_hash) \
> +
> +enum {
> +#define XDP_METADATA_KFUNC(name, str) name,
> +XDP_METADATA_KFUNC_xxx
> +#undef XDP_METADATA_KFUNC
> +MAX_XDP_METADATA_KFUNC,
> +};
> +
> +struct xdp_md;
> +int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp);
> +int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash);

We don't usually export function signatures like this for kfuncs as
nobody in the main kernel should be linking against it. See [0].

[0]: https://docs.kernel.org/bpf/kfuncs.html#creating-a-wrapper-kfunc

> +
> +#ifdef CONFIG_NET
> +u32 xdp_metadata_kfunc_id(int id);
> +bool xdp_is_metadata_kfunc_id(u32 btf_id);
> +#else
> +static inline u32 xdp_metadata_kfunc_id(int id) { return 0; }
> +static inline bool xdp_is_metadata_kfunc_id(u32 btf_id) { return false; }
> +#endif
> +
>  #endif /* __LINUX_NET_XDP_H__ */
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index d434a994ee04..c3e501e3e39c 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -2097,6 +2097,13 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
>  	if (fp->kprobe_override)
>  		return false;
>  
> +	/* When tail-calling from a non-dev-bound program to a dev-bound one,
> +	 * XDP metadata helpers should be disabled. Until it's implemented,
> +	 * prohibit adding dev-bound programs to tail-call maps.
> +	 */
> +	if (bpf_prog_is_dev_bound(fp->aux))
> +		return false;
> +
>  	spin_lock(&map->owner.lock);
>  	if (!map->owner.type) {
>  		/* There's no owner yet where we could check for
> diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
> index f714c941f8ea..3b6c9023f24d 100644
> --- a/kernel/bpf/offload.c
> +++ b/kernel/bpf/offload.c
> @@ -757,6 +757,29 @@ void bpf_dev_bound_netdev_unregister(struct net_device *dev)
>  	up_write(&bpf_devs_lock);
>  }
>  
> +void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id)
> +{
> +	const struct xdp_metadata_ops *ops;
> +	void *p = NULL;
> +
> +	down_read(&bpf_devs_lock);
> +	if (!prog->aux->offload || !prog->aux->offload->netdev)
> +		goto out;
> +
> +	ops = prog->aux->offload->netdev->xdp_metadata_ops;
> +	if (!ops)
> +		goto out;
> +
> +	if (func_id == xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_TIMESTAMP))
> +		p = ops->xmo_rx_timestamp;
> +	else if (func_id == xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_HASH))
> +		p = ops->xmo_rx_hash;
> +out:
> +	up_read(&bpf_devs_lock);
> +
> +	return p;
> +}
> +
>  static int __init bpf_offload_init(void)
>  {
>  	int err;
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 203d8cfeda70..e61fe0472b9b 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -15479,12 +15479,35 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  			    struct bpf_insn *insn_buf, int insn_idx, int *cnt)
>  {
>  	const struct bpf_kfunc_desc *desc;
> +	void *xdp_kfunc;
>  
>  	if (!insn->imm) {
>  		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
>  		return -EINVAL;
>  	}
>  
> +	*cnt = 0;
> +
> +	if (xdp_is_metadata_kfunc_id(insn->imm)) {
> +		if (!bpf_prog_is_dev_bound(env->prog->aux)) {
> +			verbose(env, "metadata kfuncs require device-bound program\n");
> +			return -EINVAL;
> +		}
> +
> +		if (bpf_prog_is_offloaded(env->prog->aux)) {
> +			verbose(env, "metadata kfuncs can't be offloaded\n");
> +			return -EINVAL;
> +		}
> +
> +		xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm);
> +		if (xdp_kfunc) {
> +			insn->imm = BPF_CALL_IMM(xdp_kfunc);
> +			return 0;
> +		}

Per another comment, should these xdp kfuncs use special_kfunc_list, or
some other variant that lives in verifier.c? I'll admit that I'm not
quite following why you wouldn't need to do the find_kfunc_desc() call
below, so apologies if I'm just totally off here.

> +
> +		/* fallback to default kfunc when not supported by netdev */
> +	}
> +
>  	/* insn->imm has the btf func_id. Replace it with
>  	 * an address (relative to __bpf_call_base).
>  	 */
> @@ -15495,7 +15518,6 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  		return -EFAULT;
>  	}
>  
> -	*cnt = 0;
>  	insn->imm = desc->imm;
>  	if (insn->off)
>  		return 0;
> @@ -16502,6 +16524,11 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
>  	if (tgt_prog) {
>  		struct bpf_prog_aux *aux = tgt_prog->aux;
>  
> +		if (bpf_prog_is_dev_bound(tgt_prog->aux)) {
> +			bpf_log(log, "Replacing device-bound programs not supported\n");
> +			return -EINVAL;
> +		}
> +
>  		for (i = 0; i < aux->func_info_cnt; i++)
>  			if (aux->func_info[i].type_id == btf_id) {
>  				subprog = i;
> diff --git a/net/core/xdp.c b/net/core/xdp.c
> index 844c9d99dc0e..b0d4080249d7 100644
> --- a/net/core/xdp.c
> +++ b/net/core/xdp.c
> @@ -4,6 +4,7 @@
>   * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
>   */
>  #include <linux/bpf.h>
> +#include <linux/btf_ids.h>
>  #include <linux/filter.h>
>  #include <linux/types.h>
>  #include <linux/mm.h>
> @@ -709,3 +710,46 @@ struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf)
>  
>  	return nxdpf;
>  }
> +
> +noinline int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
> +noinline int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash)
> +{
> +	return -EOPNOTSUPP;
> +}

I don't _think_ noinline should be necessary here given that the
function is global, though tbh I'm not sure if leaving it off will break
LTO. We currently don't use any attributes like this on other kfuncs
(e.g. [1]), but maybe we should?

[1]: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/kernel/bpf/helpers.c#n2034

> +
> +BTF_SET8_START(xdp_metadata_kfunc_ids)
> +#define XDP_METADATA_KFUNC(name, str) BTF_ID_FLAGS(func, str, 0)

IMO 'str' isn't the right parameter name here given that it's the actual
symbol and is not a string. What about _func or _symbol instead? Also
IMO 'name' is a bit misleading -- I'd go with something like '_enum'. I
wish there were a way for the preprocessor to auto-uppercase so you
could just define a single field that was used both for defining the
enum and for defining the symbol name.

> +XDP_METADATA_KFUNC_xxx
> +#undef XDP_METADATA_KFUNC
> +BTF_SET8_END(xdp_metadata_kfunc_ids)
> +
> +static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = {
> +	.owner = THIS_MODULE,
> +	.set   = &xdp_metadata_kfunc_ids,
> +};
> +
> +BTF_ID_LIST(xdp_metadata_kfunc_ids_unsorted)
> +#define XDP_METADATA_KFUNC(name, str) BTF_ID(func, str)
> +XDP_METADATA_KFUNC_xxx
> +#undef XDP_METADATA_KFUNC
> +
> +u32 xdp_metadata_kfunc_id(int id)
> +{
> +	/* xdp_metadata_kfunc_ids is sorted and can't be used */
> +	return xdp_metadata_kfunc_ids_unsorted[id];
> +}
> +
> +bool xdp_is_metadata_kfunc_id(u32 btf_id)
> +{
> +	return btf_id_set8_contains(&xdp_metadata_kfunc_ids, btf_id);
> +}

The verifier already has a notion of "special kfuncs" via a
special_kfunc_list that exists in verifier.c. Maybe we should be using
that given that is only used in the verifier anyways? OTOH, it's nice
that all of the complexity of e.g. accounting for #ifdef CONFIG_NET is
contained here, so I also like your approach. It just seems like a
divergence from how things are being done for other kfuncs so I figured
it was worth discussing.

> +
> +static int __init xdp_metadata_init(void)
> +{
> +	return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &xdp_metadata_kfunc_set);
> +}
> +late_initcall(xdp_metadata_init);
> -- 
> 2.39.0.rc1.256.g54fd8350bd-goog
>
Stanislav Fomichev Dec. 13, 2022, 8:42 p.m. UTC | #2
On Tue, Dec 13, 2022 at 9:00 AM David Vernet <void@manifault.com> wrote:
>
> On Mon, Dec 12, 2022 at 06:35:55PM -0800, Stanislav Fomichev wrote:
> > Define a new kfunc set (xdp_metadata_kfunc_ids) which implements all possible
> > XDP metatada kfuncs. Not all devices have to implement them. If kfunc is not
> > supported by the target device, the default implementation is called instead.
> > The verifier, at load time, replaces a call to the generic kfunc with a call
> > to the per-device one. Per-device kfunc pointers are stored in separate
> > struct xdp_metadata_ops.
> >
> > Cc: John Fastabend <john.fastabend@gmail.com>
> > Cc: David Ahern <dsahern@gmail.com>
> > Cc: Martin KaFai Lau <martin.lau@linux.dev>
> > Cc: Jakub Kicinski <kuba@kernel.org>
> > Cc: Willem de Bruijn <willemb@google.com>
> > Cc: Jesper Dangaard Brouer <brouer@redhat.com>
> > Cc: Anatoly Burakov <anatoly.burakov@intel.com>
> > Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
> > Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
> > Cc: Maryam Tahhan <mtahhan@redhat.com>
> > Cc: xdp-hints@xdp-project.net
> > Cc: netdev@vger.kernel.org
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> >  include/linux/bpf.h       |  2 ++
> >  include/linux/netdevice.h |  7 +++++++
> >  include/net/xdp.h         | 25 ++++++++++++++++++++++
> >  kernel/bpf/core.c         |  7 +++++++
> >  kernel/bpf/offload.c      | 23 ++++++++++++++++++++
> >  kernel/bpf/verifier.c     | 29 +++++++++++++++++++++++++-
> >  net/core/xdp.c            | 44 +++++++++++++++++++++++++++++++++++++++
> >  7 files changed, 136 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index ca22e8b8bd82..de6279725f41 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -2477,6 +2477,8 @@ void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
> >                                      struct net_device *netdev);
> >  bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev);
> >
> > +void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id);
> > +
> >  void unpriv_ebpf_notify(int new_state);
> >
> >  #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> > index 5aa35c58c342..63786091c60d 100644
> > --- a/include/linux/netdevice.h
> > +++ b/include/linux/netdevice.h
> > @@ -74,6 +74,7 @@ struct udp_tunnel_nic_info;
> >  struct udp_tunnel_nic;
> >  struct bpf_prog;
> >  struct xdp_buff;
> > +struct xdp_md;
> >
> >  void synchronize_net(void);
> >  void netdev_set_default_ethtool_ops(struct net_device *dev,
> > @@ -1613,6 +1614,11 @@ struct net_device_ops {
> >                                                 bool cycles);
> >  };
> >
> > +struct xdp_metadata_ops {
> > +     int     (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
> > +     int     (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash);
> > +};
> > +
> >  /**
> >   * enum netdev_priv_flags - &struct net_device priv_flags
> >   *
> > @@ -2044,6 +2050,7 @@ struct net_device {
> >       unsigned int            flags;
> >       unsigned long long      priv_flags;
> >       const struct net_device_ops *netdev_ops;
> > +     const struct xdp_metadata_ops *xdp_metadata_ops;
> >       int                     ifindex;
> >       unsigned short          gflags;
> >       unsigned short          hard_header_len;
> > diff --git a/include/net/xdp.h b/include/net/xdp.h
> > index 55dbc68bfffc..152c3a9c1127 100644
> > --- a/include/net/xdp.h
> > +++ b/include/net/xdp.h
> > @@ -409,4 +409,29 @@ void xdp_attachment_setup(struct xdp_attachment_info *info,
> >
> >  #define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE
> >
> > +#define XDP_METADATA_KFUNC_xxx       \
> > +     XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_TIMESTAMP, \
> > +                        bpf_xdp_metadata_rx_timestamp) \
> > +     XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \
> > +                        bpf_xdp_metadata_rx_hash) \
> > +
> > +enum {
> > +#define XDP_METADATA_KFUNC(name, str) name,
> > +XDP_METADATA_KFUNC_xxx
> > +#undef XDP_METADATA_KFUNC
> > +MAX_XDP_METADATA_KFUNC,
> > +};
> > +
> > +struct xdp_md;
> > +int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp);
> > +int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash);
>
> We don't usually export function signatures like this for kfuncs as
> nobody in the main kernel should be linking against it. See [0].
>
> [0]: https://docs.kernel.org/bpf/kfuncs.html#creating-a-wrapper-kfunc

Oh, thanks, that's very helpful. As you might have guessed, I've added
those signatures to make the compiler happy :-(

> > +
> > +#ifdef CONFIG_NET
> > +u32 xdp_metadata_kfunc_id(int id);
> > +bool xdp_is_metadata_kfunc_id(u32 btf_id);
> > +#else
> > +static inline u32 xdp_metadata_kfunc_id(int id) { return 0; }
> > +static inline bool xdp_is_metadata_kfunc_id(u32 btf_id) { return false; }
> > +#endif
> > +
> >  #endif /* __LINUX_NET_XDP_H__ */
> > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> > index d434a994ee04..c3e501e3e39c 100644
> > --- a/kernel/bpf/core.c
> > +++ b/kernel/bpf/core.c
> > @@ -2097,6 +2097,13 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
> >       if (fp->kprobe_override)
> >               return false;
> >
> > +     /* When tail-calling from a non-dev-bound program to a dev-bound one,
> > +      * XDP metadata helpers should be disabled. Until it's implemented,
> > +      * prohibit adding dev-bound programs to tail-call maps.
> > +      */
> > +     if (bpf_prog_is_dev_bound(fp->aux))
> > +             return false;
> > +
> >       spin_lock(&map->owner.lock);
> >       if (!map->owner.type) {
> >               /* There's no owner yet where we could check for
> > diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
> > index f714c941f8ea..3b6c9023f24d 100644
> > --- a/kernel/bpf/offload.c
> > +++ b/kernel/bpf/offload.c
> > @@ -757,6 +757,29 @@ void bpf_dev_bound_netdev_unregister(struct net_device *dev)
> >       up_write(&bpf_devs_lock);
> >  }
> >
> > +void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id)
> > +{
> > +     const struct xdp_metadata_ops *ops;
> > +     void *p = NULL;
> > +
> > +     down_read(&bpf_devs_lock);
> > +     if (!prog->aux->offload || !prog->aux->offload->netdev)
> > +             goto out;
> > +
> > +     ops = prog->aux->offload->netdev->xdp_metadata_ops;
> > +     if (!ops)
> > +             goto out;
> > +
> > +     if (func_id == xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_TIMESTAMP))
> > +             p = ops->xmo_rx_timestamp;
> > +     else if (func_id == xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_HASH))
> > +             p = ops->xmo_rx_hash;
> > +out:
> > +     up_read(&bpf_devs_lock);
> > +
> > +     return p;
> > +}
> > +
> >  static int __init bpf_offload_init(void)
> >  {
> >       int err;
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index 203d8cfeda70..e61fe0472b9b 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -15479,12 +15479,35 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> >                           struct bpf_insn *insn_buf, int insn_idx, int *cnt)
> >  {
> >       const struct bpf_kfunc_desc *desc;
> > +     void *xdp_kfunc;
> >
> >       if (!insn->imm) {
> >               verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
> >               return -EINVAL;
> >       }
> >
> > +     *cnt = 0;
> > +
> > +     if (xdp_is_metadata_kfunc_id(insn->imm)) {
> > +             if (!bpf_prog_is_dev_bound(env->prog->aux)) {
> > +                     verbose(env, "metadata kfuncs require device-bound program\n");
> > +                     return -EINVAL;
> > +             }
> > +
> > +             if (bpf_prog_is_offloaded(env->prog->aux)) {
> > +                     verbose(env, "metadata kfuncs can't be offloaded\n");
> > +                     return -EINVAL;
> > +             }
> > +
> > +             xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm);
> > +             if (xdp_kfunc) {
> > +                     insn->imm = BPF_CALL_IMM(xdp_kfunc);
> > +                     return 0;
> > +             }
>
> Per another comment, should these xdp kfuncs use special_kfunc_list, or
> some other variant that lives in verifier.c? I'll admit that I'm not
> quite following why you wouldn't need to do the find_kfunc_desc() call
> below, so apologies if I'm just totally off here.

Here I'm trying to short-circuit that generic verifier handling and do
kfunc resolving myself, so not sure. Will comment about
special_kfunc_list below.

> > +
> > +             /* fallback to default kfunc when not supported by netdev */
> > +     }
> > +
> >       /* insn->imm has the btf func_id. Replace it with
> >        * an address (relative to __bpf_call_base).
> >        */
> > @@ -15495,7 +15518,6 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> >               return -EFAULT;
> >       }
> >
> > -     *cnt = 0;
> >       insn->imm = desc->imm;
> >       if (insn->off)
> >               return 0;
> > @@ -16502,6 +16524,11 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
> >       if (tgt_prog) {
> >               struct bpf_prog_aux *aux = tgt_prog->aux;
> >
> > +             if (bpf_prog_is_dev_bound(tgt_prog->aux)) {
> > +                     bpf_log(log, "Replacing device-bound programs not supported\n");
> > +                     return -EINVAL;
> > +             }
> > +
> >               for (i = 0; i < aux->func_info_cnt; i++)
> >                       if (aux->func_info[i].type_id == btf_id) {
> >                               subprog = i;
> > diff --git a/net/core/xdp.c b/net/core/xdp.c
> > index 844c9d99dc0e..b0d4080249d7 100644
> > --- a/net/core/xdp.c
> > +++ b/net/core/xdp.c
> > @@ -4,6 +4,7 @@
> >   * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
> >   */
> >  #include <linux/bpf.h>
> > +#include <linux/btf_ids.h>
> >  #include <linux/filter.h>
> >  #include <linux/types.h>
> >  #include <linux/mm.h>
> > @@ -709,3 +710,46 @@ struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf)
> >
> >       return nxdpf;
> >  }
> > +
> > +noinline int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
> > +{
> > +     return -EOPNOTSUPP;
> > +}
> > +
> > +noinline int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash)
> > +{
> > +     return -EOPNOTSUPP;
> > +}
>
> I don't _think_ noinline should be necessary here given that the
> function is global, though tbh I'm not sure if leaving it off will break
> LTO. We currently don't use any attributes like this on other kfuncs
> (e.g. [1]), but maybe we should?
>
> [1]: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/kernel/bpf/helpers.c#n2034

Hm, I guess since I'm not really directly calling these anywhere,
there is no chance they are going to be inlined? Will try to drop and
see what happens..

> > +
> > +BTF_SET8_START(xdp_metadata_kfunc_ids)
> > +#define XDP_METADATA_KFUNC(name, str) BTF_ID_FLAGS(func, str, 0)
>
> IMO 'str' isn't the right parameter name here given that it's the actual
> symbol and is not a string. What about _func or _symbol instead? Also
> IMO 'name' is a bit misleading -- I'd go with something like '_enum'. I
> wish there were a way for the preprocessor to auto-uppercase so you
> could just define a single field that was used both for defining the
> enum and for defining the symbol name.

How about I do the following:

enum {
#define XDP_METADATA_KFUNC(name, _) name,
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC
MAX_XDP_METADATA_KFUNC,
};

And then this in the .c file:

BTF_SET8_START(xdp_metadata_kfunc_ids)
#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, 0)
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC
BTF_SET8_END(xdp_metadata_kfunc_ids)

Should be a bit more clear what and where I use? Otherwise, using
_func might seem a bit confusing in:
#define XDP_METADATA_KFUNC(_enum, _func) BTF_ID_FLAGS(func, _func, 0)

The "func, _func" part. Or maybe that's fine.. WDYT?

> > +XDP_METADATA_KFUNC_xxx
> > +#undef XDP_METADATA_KFUNC
> > +BTF_SET8_END(xdp_metadata_kfunc_ids)
> > +
> > +static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = {
> > +     .owner = THIS_MODULE,
> > +     .set   = &xdp_metadata_kfunc_ids,
> > +};
> > +
> > +BTF_ID_LIST(xdp_metadata_kfunc_ids_unsorted)
> > +#define XDP_METADATA_KFUNC(name, str) BTF_ID(func, str)
> > +XDP_METADATA_KFUNC_xxx
> > +#undef XDP_METADATA_KFUNC
> > +
> > +u32 xdp_metadata_kfunc_id(int id)
> > +{
> > +     /* xdp_metadata_kfunc_ids is sorted and can't be used */
> > +     return xdp_metadata_kfunc_ids_unsorted[id];
> > +}
> > +
> > +bool xdp_is_metadata_kfunc_id(u32 btf_id)
> > +{
> > +     return btf_id_set8_contains(&xdp_metadata_kfunc_ids, btf_id);
> > +}
>
> The verifier already has a notion of "special kfuncs" via a
> special_kfunc_list that exists in verifier.c. Maybe we should be using
> that given that is only used in the verifier anyways? OTOH, it's nice
> that all of the complexity of e.g. accounting for #ifdef CONFIG_NET is
> contained here, so I also like your approach. It just seems like a
> divergence from how things are being done for other kfuncs so I figured
> it was worth discussing.

Yeah, idk, I've tried not to add more to the already huge verifier.c file :-(
If we were to put everything into verifier.c, I'd still need some
extra special_xdp_kfunc_list for those xdp kfuncs to be able to
distinguish them from the rest...
So yeah, not sure, I'd prefer to keep everything in xdp.c and not
pollute the more generic verifier.c, but I'm fine either way. LMK if
you feel strongly about it, can move.


> > +
> > +static int __init xdp_metadata_init(void)
> > +{
> > +     return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &xdp_metadata_kfunc_set);
> > +}
> > +late_initcall(xdp_metadata_init);
> > --
> > 2.39.0.rc1.256.g54fd8350bd-goog
> >
David Vernet Dec. 13, 2022, 9:45 p.m. UTC | #3
On Tue, Dec 13, 2022 at 12:42:30PM -0800, Stanislav Fomichev wrote:

[...]

> > We don't usually export function signatures like this for kfuncs as
> > nobody in the main kernel should be linking against it. See [0].
> >
> > [0]: https://docs.kernel.org/bpf/kfuncs.html#creating-a-wrapper-kfunc
> 
> Oh, thanks, that's very helpful. As you might have guessed, I've added
> those signatures to make the compiler happy :-(

No problem, and yeah, it's a pain :-( It would be really nice if we
could do something like this:

#define __kfunc __attribute__((nowarn("Wmissing-protoypes")))

But that attribute doesn't exist.

> > > +     if (xdp_is_metadata_kfunc_id(insn->imm)) {
> > > +             if (!bpf_prog_is_dev_bound(env->prog->aux)) {
> > > +                     verbose(env, "metadata kfuncs require device-bound program\n");
> > > +                     return -EINVAL;
> > > +             }
> > > +
> > > +             if (bpf_prog_is_offloaded(env->prog->aux)) {
> > > +                     verbose(env, "metadata kfuncs can't be offloaded\n");
> > > +                     return -EINVAL;
> > > +             }
> > > +
> > > +             xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm);
> > > +             if (xdp_kfunc) {
> > > +                     insn->imm = BPF_CALL_IMM(xdp_kfunc);
> > > +                     return 0;
> > > +             }
> >
> > Per another comment, should these xdp kfuncs use special_kfunc_list, or
> > some other variant that lives in verifier.c? I'll admit that I'm not
> > quite following why you wouldn't need to do the find_kfunc_desc() call
> > below, so apologies if I'm just totally off here.
> 
> Here I'm trying to short-circuit that generic verifier handling and do
> kfunc resolving myself, so not sure. Will comment about
> special_kfunc_list below.

Understood -- if it's totally separate then do what you need to do. My
only "objection" is that it's a bit sad when we have divergent /
special-case handling in the verifier between all these different kfunc
/ helpers / etc, but I think it's inevitable until we do a larger
refactoring.  It's contained to fixup_kfunc_call() at least, so IMO it's
fine.

[...]

> 
> > > +
> > > +             /* fallback to default kfunc when not supported by netdev */
> > > +     }
> > > +
> > >       /* insn->imm has the btf func_id. Replace it with
> > >        * an address (relative to __bpf_call_base).
> > >        */
> > > @@ -15495,7 +15518,6 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> > >               return -EFAULT;
> > >       }
> > >
> > > -     *cnt = 0;
> > >       insn->imm = desc->imm;
> > >       if (insn->off)
> > >               return 0;
> > > @@ -16502,6 +16524,11 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
> > >       if (tgt_prog) {
> > >               struct bpf_prog_aux *aux = tgt_prog->aux;
> > >
> > > +             if (bpf_prog_is_dev_bound(tgt_prog->aux)) {
> > > +                     bpf_log(log, "Replacing device-bound programs not supported\n");
> > > +                     return -EINVAL;
> > > +             }
> > > +
> > >               for (i = 0; i < aux->func_info_cnt; i++)
> > >                       if (aux->func_info[i].type_id == btf_id) {
> > >                               subprog = i;
> > > diff --git a/net/core/xdp.c b/net/core/xdp.c
> > > index 844c9d99dc0e..b0d4080249d7 100644
> > > --- a/net/core/xdp.c
> > > +++ b/net/core/xdp.c
> > > @@ -4,6 +4,7 @@
> > >   * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
> > >   */
> > >  #include <linux/bpf.h>
> > > +#include <linux/btf_ids.h>
> > >  #include <linux/filter.h>
> > >  #include <linux/types.h>
> > >  #include <linux/mm.h>
> > > @@ -709,3 +710,46 @@ struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf)
> > >
> > >       return nxdpf;
> > >  }
> > > +
> > > +noinline int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
> > > +{
> > > +     return -EOPNOTSUPP;
> > > +}
> > > +
> > > +noinline int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash)
> > > +{
> > > +     return -EOPNOTSUPP;
> > > +}
> >
> > I don't _think_ noinline should be necessary here given that the
> > function is global, though tbh I'm not sure if leaving it off will break
> > LTO. We currently don't use any attributes like this on other kfuncs
> > (e.g. [1]), but maybe we should?
> >
> > [1]: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/kernel/bpf/helpers.c#n2034
> 
> Hm, I guess since I'm not really directly calling these anywhere,
> there is no chance they are going to be inlined? Will try to drop and
> see what happens..

Yeah, if it's a global symbol I think you should be OK. Again, we need
to figure out the story for LTO though. Later on I think we should add a
__kfunc macro which handles this invisibly for all kfunc definitions.

> > > +
> > > +BTF_SET8_START(xdp_metadata_kfunc_ids)
> > > +#define XDP_METADATA_KFUNC(name, str) BTF_ID_FLAGS(func, str, 0)
> >
> > IMO 'str' isn't the right parameter name here given that it's the actual
> > symbol and is not a string. What about _func or _symbol instead? Also
> > IMO 'name' is a bit misleading -- I'd go with something like '_enum'. I
> > wish there were a way for the preprocessor to auto-uppercase so you
> > could just define a single field that was used both for defining the
> > enum and for defining the symbol name.
> 
> How about I do the following:
> 
> enum {
> #define XDP_METADATA_KFUNC(name, _) name,
> XDP_METADATA_KFUNC_xxx
> #undef XDP_METADATA_KFUNC
> MAX_XDP_METADATA_KFUNC,
> };

Looks good!

> 
> And then this in the .c file:
> 
> BTF_SET8_START(xdp_metadata_kfunc_ids)
> #define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, 0)
> XDP_METADATA_KFUNC_xxx
> #undef XDP_METADATA_KFUNC
> BTF_SET8_END(xdp_metadata_kfunc_ids)

Here as well.

> 
> Should be a bit more clear what and where I use? Otherwise, using
> _func might seem a bit confusing in:
> #define XDP_METADATA_KFUNC(_enum, _func) BTF_ID_FLAGS(func, _func, 0)
> 
> The "func, _func" part. Or maybe that's fine.. WDYT?

LGTM, thanks!

> > > +XDP_METADATA_KFUNC_xxx
> > > +#undef XDP_METADATA_KFUNC
> > > +BTF_SET8_END(xdp_metadata_kfunc_ids)
> > > +
> > > +static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = {
> > > +     .owner = THIS_MODULE,
> > > +     .set   = &xdp_metadata_kfunc_ids,
> > > +};
> > > +
> > > +BTF_ID_LIST(xdp_metadata_kfunc_ids_unsorted)
> > > +#define XDP_METADATA_KFUNC(name, str) BTF_ID(func, str)
> > > +XDP_METADATA_KFUNC_xxx
> > > +#undef XDP_METADATA_KFUNC
> > > +
> > > +u32 xdp_metadata_kfunc_id(int id)
> > > +{
> > > +     /* xdp_metadata_kfunc_ids is sorted and can't be used */
> > > +     return xdp_metadata_kfunc_ids_unsorted[id];
> > > +}
> > > +
> > > +bool xdp_is_metadata_kfunc_id(u32 btf_id)
> > > +{
> > > +     return btf_id_set8_contains(&xdp_metadata_kfunc_ids, btf_id);
> > > +}
> >
> > The verifier already has a notion of "special kfuncs" via a
> > special_kfunc_list that exists in verifier.c. Maybe we should be using
> > that given that is only used in the verifier anyways? OTOH, it's nice
> > that all of the complexity of e.g. accounting for #ifdef CONFIG_NET is
> > contained here, so I also like your approach. It just seems like a
> > divergence from how things are being done for other kfuncs so I figured
> > it was worth discussing.
> 
> Yeah, idk, I've tried not to add more to the already huge verifier.c file :-(
> If we were to put everything into verifier.c, I'd still need some
> extra special_xdp_kfunc_list for those xdp kfuncs to be able to
> distinguish them from the rest...
> So yeah, not sure, I'd prefer to keep everything in xdp.c and not
> pollute the more generic verifier.c, but I'm fine either way. LMK if
> you feel strongly about it, can move.

IMO not polluting the already enormous verifier.c is definitely the
right thing to do -- especially for kfuncs like this which are going to
be defined throughout the kernel. So yeah, you can keep what you have.
And maybe at some point we should pull more logic out of verifier.c and
into the locations where the kfuncs are implemented as you're doing
here.
Martin KaFai Lau Dec. 14, 2022, 1:53 a.m. UTC | #4
On 12/12/22 6:35 PM, Stanislav Fomichev wrote:
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index ca22e8b8bd82..de6279725f41 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -2477,6 +2477,8 @@ void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
>   				       struct net_device *netdev);
>   bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev);
>   
> +void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id);
> +

This probably requires an inline version for !CONFIG_NET.

> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index d434a994ee04..c3e501e3e39c 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -2097,6 +2097,13 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
>   	if (fp->kprobe_override)
>   		return false;
>   
> +	/* When tail-calling from a non-dev-bound program to a dev-bound one,
> +	 * XDP metadata helpers should be disabled. Until it's implemented,
> +	 * prohibit adding dev-bound programs to tail-call maps.
> +	 */
> +	if (bpf_prog_is_dev_bound(fp->aux))
> +		return false;
> +
>   	spin_lock(&map->owner.lock);
>   	if (!map->owner.type) {
>   		/* There's no owner yet where we could check for
> diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
> index f714c941f8ea..3b6c9023f24d 100644
> --- a/kernel/bpf/offload.c
> +++ b/kernel/bpf/offload.c
> @@ -757,6 +757,29 @@ void bpf_dev_bound_netdev_unregister(struct net_device *dev)
>   	up_write(&bpf_devs_lock);
>   }
>   
> +void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id)
> +{
> +	const struct xdp_metadata_ops *ops;
> +	void *p = NULL;
> +
> +	down_read(&bpf_devs_lock);
> +	if (!prog->aux->offload || !prog->aux->offload->netdev)

This happens when netdev is unregistered in the middle of bpf_prog_load and the 
bpf_offload_dev_match() will eventually fail during dev_xdp_attach()? A comment 
will be useful.

> +		goto out;
> +
> +	ops = prog->aux->offload->netdev->xdp_metadata_ops;
> +	if (!ops)
> +		goto out;
> +
> +	if (func_id == xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_TIMESTAMP))
> +		p = ops->xmo_rx_timestamp;
> +	else if (func_id == xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_HASH))
> +		p = ops->xmo_rx_hash;
> +out:
> +	up_read(&bpf_devs_lock);
> +
> +	return p;
> +}
> +
>   static int __init bpf_offload_init(void)
>   {
>   	int err;
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 203d8cfeda70..e61fe0472b9b 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -15479,12 +15479,35 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>   			    struct bpf_insn *insn_buf, int insn_idx, int *cnt)
>   {
>   	const struct bpf_kfunc_desc *desc;
> +	void *xdp_kfunc;
>   
>   	if (!insn->imm) {
>   		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
>   		return -EINVAL;
>   	}
>   
> +	*cnt = 0;
> +
> +	if (xdp_is_metadata_kfunc_id(insn->imm)) {
> +		if (!bpf_prog_is_dev_bound(env->prog->aux)) {

The "xdp_is_metadata_kfunc_id() && (!bpf_prog_is_dev_bound() || 
bpf_prog_is_offloaded())" test should have been done much earlier in 
add_kfunc_call(). Then the later stage of the verifier does not have to keep 
worrying about it like here.

nit. may be rename xdp_is_metadata_kfunc_id() to bpf_dev_bound_kfunc_id() and 
hide the "!bpf_prog_is_dev_bound() || bpf_prog_is_offloaded()" test into 
bpf_dev_bound_kfunc_check(&env->log, env->prog).

The change in fixup_kfunc_call could then become:

	if (bpf_dev_bound_kfunc_id(insn->imm)) {
		xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm);
		/* ... */
	}

> +			verbose(env, "metadata kfuncs require device-bound program\n");
> +			return -EINVAL;
> +		}
> +
> +		if (bpf_prog_is_offloaded(env->prog->aux)) {
> +			verbose(env, "metadata kfuncs can't be offloaded\n");
> +			return -EINVAL;
> +		}
> +
> +		xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm);
> +		if (xdp_kfunc) {
> +			insn->imm = BPF_CALL_IMM(xdp_kfunc);
> +			return 0;
> +		}
> +
> +		/* fallback to default kfunc when not supported by netdev */
> +	}
> +
Toke Høiland-Jørgensen Dec. 14, 2022, 10:54 a.m. UTC | #5
Stanislav Fomichev <sdf@google.com> writes:

[..]

> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index d434a994ee04..c3e501e3e39c 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -2097,6 +2097,13 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
>  	if (fp->kprobe_override)
>  		return false;
>  
> +	/* When tail-calling from a non-dev-bound program to a dev-bound one,
> +	 * XDP metadata helpers should be disabled. Until it's implemented,
> +	 * prohibit adding dev-bound programs to tail-call maps.
> +	 */
> +	if (bpf_prog_is_dev_bound(fp->aux))
> +		return false;
> +

nit: the comment is slightly inaccurate as the program running in a
devmap/cpumap has nothing to do with tail calls. maybe replace it with:

"XDP programs inserted into maps are not guaranteed to run on a
particular netdev (and can run outside driver context entirely in the
case of devmap and cpumap). Until device checks are implemented,
prohibit adding dev-bound programs to program maps."

Also, there needs to be a check in bpf_prog_test_run_xdp() to reject
dev-bound programs there as well...

-Toke
Stanislav Fomichev Dec. 14, 2022, 6:43 p.m. UTC | #6
On Wed, Dec 14, 2022 at 2:54 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>
> Stanislav Fomichev <sdf@google.com> writes:
>
> [..]
>
> > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> > index d434a994ee04..c3e501e3e39c 100644
> > --- a/kernel/bpf/core.c
> > +++ b/kernel/bpf/core.c
> > @@ -2097,6 +2097,13 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
> >       if (fp->kprobe_override)
> >               return false;
> >
> > +     /* When tail-calling from a non-dev-bound program to a dev-bound one,
> > +      * XDP metadata helpers should be disabled. Until it's implemented,
> > +      * prohibit adding dev-bound programs to tail-call maps.
> > +      */
> > +     if (bpf_prog_is_dev_bound(fp->aux))
> > +             return false;
> > +
>
> nit: the comment is slightly inaccurate as the program running in a
> devmap/cpumap has nothing to do with tail calls. maybe replace it with:
>
> "XDP programs inserted into maps are not guaranteed to run on a
> particular netdev (and can run outside driver context entirely in the
> case of devmap and cpumap). Until device checks are implemented,
> prohibit adding dev-bound programs to program maps."

SG.

> Also, there needs to be a check in bpf_prog_test_run_xdp() to reject
> dev-bound programs there as well...

Ah, totally forgot about this part, thanks for reminding!

> -Toke
>
Stanislav Fomichev Dec. 14, 2022, 6:43 p.m. UTC | #7
On Tue, Dec 13, 2022 at 5:54 PM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 12/12/22 6:35 PM, Stanislav Fomichev wrote:
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index ca22e8b8bd82..de6279725f41 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -2477,6 +2477,8 @@ void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
> >                                      struct net_device *netdev);
> >   bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev);
> >
> > +void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id);
> > +
>
> This probably requires an inline version for !CONFIG_NET.

Yeah, not sure why my confings didn't catch this :-(

> > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> > index d434a994ee04..c3e501e3e39c 100644
> > --- a/kernel/bpf/core.c
> > +++ b/kernel/bpf/core.c
> > @@ -2097,6 +2097,13 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
> >       if (fp->kprobe_override)
> >               return false;
> >
> > +     /* When tail-calling from a non-dev-bound program to a dev-bound one,
> > +      * XDP metadata helpers should be disabled. Until it's implemented,
> > +      * prohibit adding dev-bound programs to tail-call maps.
> > +      */
> > +     if (bpf_prog_is_dev_bound(fp->aux))
> > +             return false;
> > +
> >       spin_lock(&map->owner.lock);
> >       if (!map->owner.type) {
> >               /* There's no owner yet where we could check for
> > diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
> > index f714c941f8ea..3b6c9023f24d 100644
> > --- a/kernel/bpf/offload.c
> > +++ b/kernel/bpf/offload.c
> > @@ -757,6 +757,29 @@ void bpf_dev_bound_netdev_unregister(struct net_device *dev)
> >       up_write(&bpf_devs_lock);
> >   }
> >
> > +void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id)
> > +{
> > +     const struct xdp_metadata_ops *ops;
> > +     void *p = NULL;
> > +
> > +     down_read(&bpf_devs_lock);
> > +     if (!prog->aux->offload || !prog->aux->offload->netdev)
>
> This happens when netdev is unregistered in the middle of bpf_prog_load and the
> bpf_offload_dev_match() will eventually fail during dev_xdp_attach()? A comment
> will be useful.

Right, that's the expectation - we load/verify the prog but it's
essentially un-attach-able.
Will try to clarify here.

> > +             goto out;
> > +
> > +     ops = prog->aux->offload->netdev->xdp_metadata_ops;
> > +     if (!ops)
> > +             goto out;
> > +
> > +     if (func_id == xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_TIMESTAMP))
> > +             p = ops->xmo_rx_timestamp;
> > +     else if (func_id == xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_HASH))
> > +             p = ops->xmo_rx_hash;
> > +out:
> > +     up_read(&bpf_devs_lock);
> > +
> > +     return p;
> > +}
> > +
> >   static int __init bpf_offload_init(void)
> >   {
> >       int err;
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index 203d8cfeda70..e61fe0472b9b 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -15479,12 +15479,35 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> >                           struct bpf_insn *insn_buf, int insn_idx, int *cnt)
> >   {
> >       const struct bpf_kfunc_desc *desc;
> > +     void *xdp_kfunc;
> >
> >       if (!insn->imm) {
> >               verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
> >               return -EINVAL;
> >       }
> >
> > +     *cnt = 0;
> > +
> > +     if (xdp_is_metadata_kfunc_id(insn->imm)) {
> > +             if (!bpf_prog_is_dev_bound(env->prog->aux)) {
>
> The "xdp_is_metadata_kfunc_id() && (!bpf_prog_is_dev_bound() ||
> bpf_prog_is_offloaded())" test should have been done much earlier in
> add_kfunc_call(). Then the later stage of the verifier does not have to keep
> worrying about it like here.
>
> nit. may be rename xdp_is_metadata_kfunc_id() to bpf_dev_bound_kfunc_id() and
> hide the "!bpf_prog_is_dev_bound() || bpf_prog_is_offloaded()" test into
> bpf_dev_bound_kfunc_check(&env->log, env->prog).
>
> The change in fixup_kfunc_call could then become:
>
>         if (bpf_dev_bound_kfunc_id(insn->imm)) {
>                 xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm);
>                 /* ... */
>         }

Makes sense, ty!


> > +                     verbose(env, "metadata kfuncs require device-bound program\n");
> > +                     return -EINVAL;
> > +             }
> > +
> > +             if (bpf_prog_is_offloaded(env->prog->aux)) {
> > +                     verbose(env, "metadata kfuncs can't be offloaded\n");
> > +                     return -EINVAL;
> > +             }
> > +
> > +             xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm);
> > +             if (xdp_kfunc) {
> > +                     insn->imm = BPF_CALL_IMM(xdp_kfunc);
> > +                     return 0;
> > +             }
> > +
> > +             /* fallback to default kfunc when not supported by netdev */
> > +     }
> > +
>
>
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ca22e8b8bd82..de6279725f41 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2477,6 +2477,8 @@  void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
 				       struct net_device *netdev);
 bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev);
 
+void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id);
+
 void unpriv_ebpf_notify(int new_state);
 
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5aa35c58c342..63786091c60d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -74,6 +74,7 @@  struct udp_tunnel_nic_info;
 struct udp_tunnel_nic;
 struct bpf_prog;
 struct xdp_buff;
+struct xdp_md;
 
 void synchronize_net(void);
 void netdev_set_default_ethtool_ops(struct net_device *dev,
@@ -1613,6 +1614,11 @@  struct net_device_ops {
 						  bool cycles);
 };
 
+struct xdp_metadata_ops {
+	int	(*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
+	int	(*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash);
+};
+
 /**
  * enum netdev_priv_flags - &struct net_device priv_flags
  *
@@ -2044,6 +2050,7 @@  struct net_device {
 	unsigned int		flags;
 	unsigned long long	priv_flags;
 	const struct net_device_ops *netdev_ops;
+	const struct xdp_metadata_ops *xdp_metadata_ops;
 	int			ifindex;
 	unsigned short		gflags;
 	unsigned short		hard_header_len;
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 55dbc68bfffc..152c3a9c1127 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -409,4 +409,29 @@  void xdp_attachment_setup(struct xdp_attachment_info *info,
 
 #define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE
 
+#define XDP_METADATA_KFUNC_xxx	\
+	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_TIMESTAMP, \
+			   bpf_xdp_metadata_rx_timestamp) \
+	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \
+			   bpf_xdp_metadata_rx_hash) \
+
+enum {
+#define XDP_METADATA_KFUNC(name, str) name,
+XDP_METADATA_KFUNC_xxx
+#undef XDP_METADATA_KFUNC
+MAX_XDP_METADATA_KFUNC,
+};
+
+struct xdp_md;
+int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp);
+int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash);
+
+#ifdef CONFIG_NET
+u32 xdp_metadata_kfunc_id(int id);
+bool xdp_is_metadata_kfunc_id(u32 btf_id);
+#else
+static inline u32 xdp_metadata_kfunc_id(int id) { return 0; }
+static inline bool xdp_is_metadata_kfunc_id(u32 btf_id) { return false; }
+#endif
+
 #endif /* __LINUX_NET_XDP_H__ */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index d434a994ee04..c3e501e3e39c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2097,6 +2097,13 @@  bool bpf_prog_map_compatible(struct bpf_map *map,
 	if (fp->kprobe_override)
 		return false;
 
+	/* When tail-calling from a non-dev-bound program to a dev-bound one,
+	 * XDP metadata helpers should be disabled. Until it's implemented,
+	 * prohibit adding dev-bound programs to tail-call maps.
+	 */
+	if (bpf_prog_is_dev_bound(fp->aux))
+		return false;
+
 	spin_lock(&map->owner.lock);
 	if (!map->owner.type) {
 		/* There's no owner yet where we could check for
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index f714c941f8ea..3b6c9023f24d 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -757,6 +757,29 @@  void bpf_dev_bound_netdev_unregister(struct net_device *dev)
 	up_write(&bpf_devs_lock);
 }
 
+void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id)
+{
+	const struct xdp_metadata_ops *ops;
+	void *p = NULL;
+
+	down_read(&bpf_devs_lock);
+	if (!prog->aux->offload || !prog->aux->offload->netdev)
+		goto out;
+
+	ops = prog->aux->offload->netdev->xdp_metadata_ops;
+	if (!ops)
+		goto out;
+
+	if (func_id == xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_TIMESTAMP))
+		p = ops->xmo_rx_timestamp;
+	else if (func_id == xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_HASH))
+		p = ops->xmo_rx_hash;
+out:
+	up_read(&bpf_devs_lock);
+
+	return p;
+}
+
 static int __init bpf_offload_init(void)
 {
 	int err;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 203d8cfeda70..e61fe0472b9b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -15479,12 +15479,35 @@  static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			    struct bpf_insn *insn_buf, int insn_idx, int *cnt)
 {
 	const struct bpf_kfunc_desc *desc;
+	void *xdp_kfunc;
 
 	if (!insn->imm) {
 		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
 		return -EINVAL;
 	}
 
+	*cnt = 0;
+
+	if (xdp_is_metadata_kfunc_id(insn->imm)) {
+		if (!bpf_prog_is_dev_bound(env->prog->aux)) {
+			verbose(env, "metadata kfuncs require device-bound program\n");
+			return -EINVAL;
+		}
+
+		if (bpf_prog_is_offloaded(env->prog->aux)) {
+			verbose(env, "metadata kfuncs can't be offloaded\n");
+			return -EINVAL;
+		}
+
+		xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm);
+		if (xdp_kfunc) {
+			insn->imm = BPF_CALL_IMM(xdp_kfunc);
+			return 0;
+		}
+
+		/* fallback to default kfunc when not supported by netdev */
+	}
+
 	/* insn->imm has the btf func_id. Replace it with
 	 * an address (relative to __bpf_call_base).
 	 */
@@ -15495,7 +15518,6 @@  static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		return -EFAULT;
 	}
 
-	*cnt = 0;
 	insn->imm = desc->imm;
 	if (insn->off)
 		return 0;
@@ -16502,6 +16524,11 @@  int bpf_check_attach_target(struct bpf_verifier_log *log,
 	if (tgt_prog) {
 		struct bpf_prog_aux *aux = tgt_prog->aux;
 
+		if (bpf_prog_is_dev_bound(tgt_prog->aux)) {
+			bpf_log(log, "Replacing device-bound programs not supported\n");
+			return -EINVAL;
+		}
+
 		for (i = 0; i < aux->func_info_cnt; i++)
 			if (aux->func_info[i].type_id == btf_id) {
 				subprog = i;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 844c9d99dc0e..b0d4080249d7 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -4,6 +4,7 @@ 
  * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
  */
 #include <linux/bpf.h>
+#include <linux/btf_ids.h>
 #include <linux/filter.h>
 #include <linux/types.h>
 #include <linux/mm.h>
@@ -709,3 +710,46 @@  struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf)
 
 	return nxdpf;
 }
+
+noinline int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
+{
+	return -EOPNOTSUPP;
+}
+
+noinline int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash)
+{
+	return -EOPNOTSUPP;
+}
+
+BTF_SET8_START(xdp_metadata_kfunc_ids)
+#define XDP_METADATA_KFUNC(name, str) BTF_ID_FLAGS(func, str, 0)
+XDP_METADATA_KFUNC_xxx
+#undef XDP_METADATA_KFUNC
+BTF_SET8_END(xdp_metadata_kfunc_ids)
+
+static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = {
+	.owner = THIS_MODULE,
+	.set   = &xdp_metadata_kfunc_ids,
+};
+
+BTF_ID_LIST(xdp_metadata_kfunc_ids_unsorted)
+#define XDP_METADATA_KFUNC(name, str) BTF_ID(func, str)
+XDP_METADATA_KFUNC_xxx
+#undef XDP_METADATA_KFUNC
+
+u32 xdp_metadata_kfunc_id(int id)
+{
+	/* xdp_metadata_kfunc_ids is sorted and can't be used */
+	return xdp_metadata_kfunc_ids_unsorted[id];
+}
+
+bool xdp_is_metadata_kfunc_id(u32 btf_id)
+{
+	return btf_id_set8_contains(&xdp_metadata_kfunc_ids, btf_id);
+}
+
+static int __init xdp_metadata_init(void)
+{
+	return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &xdp_metadata_kfunc_set);
+}
+late_initcall(xdp_metadata_init);