diff mbox series

[bpf-next,2/8] meta, bpf: Add bpf link support for meta device

Message ID 20230926055913.9859-3-daniel@iogearbox.net (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Add bpf programmable device | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 3080 this patch: 3080
netdev/cc_maintainers warning 11 maintainers not CCed: martin.lau@linux.dev jolsa@kernel.org pabeni@redhat.com haoluo@google.com davem@davemloft.net sdf@google.com edumazet@google.com yonghong.song@linux.dev kpsingh@kernel.org song@kernel.org kuba@kernel.org
netdev/build_clang success Errors and warnings before: 1519 this patch: 1519
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 3173 this patch: 3173
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 314 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain_full }}
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-5 fail Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-6 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-7 success Logs for veristat

Commit Message

Daniel Borkmann Sept. 26, 2023, 5:59 a.m. UTC
This adds BPF link support for meta device (BPF_LINK_TYPE_META). Similar
as with tcx or XDP, the BPF link for meta contains the device.

The bpf_mprog API has been reused for its implementation. For details, see
also commit e420bed0250 ("bpf: Add fd-based tcx multi-prog infra with link
support").

This is now the second user of bpf_mprog after tcx, and in meta case the
implementation is also a bit more straight forward since it does not need
to deal with miniq.

The UAPI extensions for the BPF_LINK_CREATE command are similar as for tcx,
that is, relative_{fd,id} and expected_revision.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/net/meta.c             | 211 ++++++++++++++++++++++++++++++++-
 include/net/meta.h             |   7 ++
 include/uapi/linux/bpf.h       |  11 ++
 kernel/bpf/syscall.c           |   2 +-
 tools/include/uapi/linux/bpf.h |  11 ++
 5 files changed, 240 insertions(+), 2 deletions(-)

Comments

Andrii Nakryiko Sept. 28, 2023, 12:12 a.m. UTC | #1
On Mon, Sep 25, 2023 at 10:59 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> This adds BPF link support for meta device (BPF_LINK_TYPE_META). Similar
> as with tcx or XDP, the BPF link for meta contains the device.
>
> The bpf_mprog API has been reused for its implementation. For details, see
> also commit e420bed0250 ("bpf: Add fd-based tcx multi-prog infra with link
> support").
>
> This is now the second user of bpf_mprog after tcx, and in meta case the
> implementation is also a bit more straight forward since it does not need
> to deal with miniq.
>
> The UAPI extensions for the BPF_LINK_CREATE command are similar as for tcx,
> that is, relative_{fd,id} and expected_revision.
>
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> ---
>  drivers/net/meta.c             | 211 ++++++++++++++++++++++++++++++++-
>  include/net/meta.h             |   7 ++
>  include/uapi/linux/bpf.h       |  11 ++
>  kernel/bpf/syscall.c           |   2 +-
>  tools/include/uapi/linux/bpf.h |  11 ++
>  5 files changed, 240 insertions(+), 2 deletions(-)
>

[...]

> diff --git a/include/net/meta.h b/include/net/meta.h
> index 20fc61d05970..f1abe1d6d02d 100644
> --- a/include/net/meta.h
> +++ b/include/net/meta.h
> @@ -7,6 +7,7 @@
>
>  #ifdef CONFIG_META
>  int meta_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
> +int meta_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
>  int meta_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog);
>  int meta_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr);
>  #else
> @@ -16,6 +17,12 @@ static inline int meta_prog_attach(const union bpf_attr *attr,
>         return -EINVAL;
>  }
>
> +static inline int meta_link_attach(const union bpf_attr *attr,
> +                                  struct bpf_prog *prog)
> +{
> +       return -EINVAL;
> +}
> +
>  static inline int meta_prog_detach(const union bpf_attr *attr,
>                                    struct bpf_prog *prog)
>  {
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 00a875720e84..fd069f285fbc 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1068,6 +1068,7 @@ enum bpf_link_type {
>         BPF_LINK_TYPE_NETFILTER = 10,
>         BPF_LINK_TYPE_TCX = 11,
>         BPF_LINK_TYPE_UPROBE_MULTI = 12,
> +       BPF_LINK_TYPE_META = 12,

it's not just some completely universal "meta" device, it's
specifically networking meta-device, is that right? so at least in
UAPI I think we should stay away from using super-generic "meta"
words, and do something like "net_meta" or "meta_net" or whatnot, but
indicate that this is networking stuff. WDYT?


>         MAX_BPF_LINK_TYPE,
>  };
>
> @@ -1653,6 +1654,13 @@ union bpf_attr {
>                                 __u32           flags;
>                                 __u32           pid;
>                         } uprobe_multi;
> +                       struct {
> +                               union {
> +                                       __u32   relative_fd;
> +                                       __u32   relative_id;
> +                               };
> +                               __u64           expected_revision;
> +                       } meta;
>                 };
>         } link_create;
>
> @@ -6564,6 +6572,9 @@ struct bpf_link_info {
>                         __u32 ifindex;
>                         __u32 attach_type;
>                 } tcx;
> +               struct {
> +                       __u32 ifindex;
> +               } meta;
>         };
>  } __attribute__((aligned(8)));
>
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 51baf4355c39..b689da4de280 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -4969,7 +4969,7 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
>                     attr->link_create.attach_type == BPF_TCX_EGRESS)
>                         ret = tcx_link_attach(attr, prog);
>                 else
> -                       ret = -EINVAL;
> +                       ret = meta_link_attach(attr, prog);
>                 break;
>         case BPF_PROG_TYPE_NETFILTER:
>                 ret = bpf_nf_link_attach(attr, prog);
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 00a875720e84..fd069f285fbc 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -1068,6 +1068,7 @@ enum bpf_link_type {
>         BPF_LINK_TYPE_NETFILTER = 10,
>         BPF_LINK_TYPE_TCX = 11,
>         BPF_LINK_TYPE_UPROBE_MULTI = 12,
> +       BPF_LINK_TYPE_META = 12,
>         MAX_BPF_LINK_TYPE,
>  };
>
> @@ -1653,6 +1654,13 @@ union bpf_attr {
>                                 __u32           flags;
>                                 __u32           pid;
>                         } uprobe_multi;
> +                       struct {
> +                               union {
> +                                       __u32   relative_fd;
> +                                       __u32   relative_id;
> +                               };
> +                               __u64           expected_revision;
> +                       } meta;
>                 };
>         } link_create;
>
> @@ -6564,6 +6572,9 @@ struct bpf_link_info {
>                         __u32 ifindex;
>                         __u32 attach_type;
>                 } tcx;
> +               struct {
> +                       __u32 ifindex;
> +               } meta;
>         };
>  } __attribute__((aligned(8)));
>
> --
> 2.34.1
>
diff mbox series

Patch

diff --git a/drivers/net/meta.c b/drivers/net/meta.c
index e464f547b0a6..8cb39281c455 100644
--- a/drivers/net/meta.c
+++ b/drivers/net/meta.c
@@ -27,6 +27,11 @@  struct meta {
 	u32 headroom;
 };
 
+struct meta_link {
+	struct bpf_link link;
+	struct net_device *dev;
+};
+
 static void meta_scrub_minimum(struct sk_buff *skb)
 {
 	skb->skb_iif = 0;
@@ -576,6 +581,207 @@  int meta_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
 	return ret;
 }
 
+static struct meta_link *meta_link(struct bpf_link *link)
+{
+	return container_of(link, struct meta_link, link);
+}
+
+static const struct meta_link *meta_link_const(const struct bpf_link *link)
+{
+	return meta_link((struct bpf_link *)link);
+}
+
+static int meta_link_prog_attach(struct bpf_link *link, u32 flags,
+				 u32 id_or_fd, u64 revision)
+{
+	struct meta_link *meta = meta_link(link);
+	struct bpf_mprog_entry *entry, *entry_new;
+	struct net_device *dev = meta->dev;
+	int ret;
+
+	ASSERT_RTNL();
+	entry = meta_entry_fetch(dev, true);
+	ret = bpf_mprog_attach(entry, &entry_new, link->prog, link, NULL, flags,
+			       id_or_fd, revision);
+	if (!ret) {
+		if (entry != entry_new) {
+			meta_entry_update(dev, entry_new);
+			meta_entry_sync();
+		}
+		bpf_mprog_commit(entry);
+	}
+	return ret;
+}
+
+static void meta_link_release(struct bpf_link *link)
+{
+	struct meta_link *meta = meta_link(link);
+	struct bpf_mprog_entry *entry, *entry_new;
+	struct net_device *dev;
+	int ret = 0;
+
+	rtnl_lock();
+	dev = meta->dev;
+	if (!dev)
+		goto out;
+	entry = meta_entry_fetch(dev, false);
+	if (!entry) {
+		ret = -ENOENT;
+		goto out;
+	}
+	ret = bpf_mprog_detach(entry, &entry_new, link->prog, link, 0, 0, 0);
+	if (!ret) {
+		if (!bpf_mprog_total(entry_new))
+			entry_new = NULL;
+		meta_entry_update(dev, entry_new);
+		meta_entry_sync();
+		bpf_mprog_commit(entry);
+		meta->dev = NULL;
+	}
+out:
+	WARN_ON_ONCE(ret);
+	rtnl_unlock();
+}
+
+static int meta_link_update(struct bpf_link *link, struct bpf_prog *nprog,
+			    struct bpf_prog *oprog)
+{
+	struct meta_link *meta = meta_link(link);
+	struct bpf_mprog_entry *entry, *entry_new;
+	struct net_device *dev;
+	int ret = 0;
+
+	rtnl_lock();
+	dev = meta->dev;
+	if (!dev) {
+		ret = -ENOLINK;
+		goto out;
+	}
+	if (oprog && link->prog != oprog) {
+		ret = -EPERM;
+		goto out;
+	}
+	oprog = link->prog;
+	if (oprog == nprog) {
+		bpf_prog_put(nprog);
+		goto out;
+	}
+	entry = meta_entry_fetch(dev, false);
+	if (!entry) {
+		ret = -ENOENT;
+		goto out;
+	}
+	ret = bpf_mprog_attach(entry, &entry_new, nprog, link, oprog,
+			       BPF_F_REPLACE | BPF_F_ID,
+			       link->prog->aux->id, 0);
+	if (!ret) {
+		WARN_ON_ONCE(entry != entry_new);
+		oprog = xchg(&link->prog, nprog);
+		bpf_prog_put(oprog);
+		bpf_mprog_commit(entry);
+	}
+out:
+	rtnl_unlock();
+	return ret;
+}
+
+static void meta_link_dealloc(struct bpf_link *link)
+{
+	kfree(meta_link(link));
+}
+
+static void meta_link_fdinfo(const struct bpf_link *link, struct seq_file *seq)
+{
+	const struct meta_link *meta = meta_link_const(link);
+	u32 ifindex = 0;
+
+	rtnl_lock();
+	if (meta->dev)
+		ifindex = meta->dev->ifindex;
+	rtnl_unlock();
+
+	seq_printf(seq, "ifindex:\t%u\n", ifindex);
+}
+
+static int meta_link_fill_info(const struct bpf_link *link,
+			       struct bpf_link_info *info)
+{
+	const struct meta_link *meta = meta_link_const(link);
+	u32 ifindex = 0;
+
+	rtnl_lock();
+	if (meta->dev)
+		ifindex = meta->dev->ifindex;
+	rtnl_unlock();
+
+	info->meta.ifindex = ifindex;
+	return 0;
+}
+
+static int meta_link_detach(struct bpf_link *link)
+{
+	meta_link_release(link);
+	return 0;
+}
+
+static const struct bpf_link_ops meta_link_lops = {
+	.release	= meta_link_release,
+	.detach		= meta_link_detach,
+	.dealloc	= meta_link_dealloc,
+	.update_prog	= meta_link_update,
+	.show_fdinfo	= meta_link_fdinfo,
+	.fill_link_info	= meta_link_fill_info,
+};
+
+static int meta_link_init(struct meta_link *meta,
+			  struct bpf_link_primer *link_primer,
+			  struct net_device *dev, struct bpf_prog *prog)
+{
+	bpf_link_init(&meta->link, BPF_LINK_TYPE_META, &meta_link_lops, prog);
+	meta->dev = dev;
+	return bpf_link_prime(&meta->link, link_primer);
+}
+
+int meta_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+	struct bpf_link_primer link_primer;
+	struct net_device *dev;
+	struct meta_link *meta;
+	int ret;
+
+	rtnl_lock();
+	dev = meta_dev_fetch(current->nsproxy->net_ns,
+			     attr->link_create.target_ifindex,
+			     attr->link_create.attach_type);
+	if (IS_ERR(dev)) {
+		ret = PTR_ERR(dev);
+		goto out;
+	}
+	meta = kzalloc(sizeof(*meta), GFP_USER);
+	if (!meta) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	ret = meta_link_init(meta, &link_primer, dev, prog);
+	if (ret) {
+		kfree(meta);
+		goto out;
+	}
+	ret = meta_link_prog_attach(&meta->link,
+				    attr->link_create.flags,
+				    attr->link_create.meta.relative_fd,
+				    attr->link_create.meta.expected_revision);
+	if (ret) {
+		meta->dev = NULL;
+		bpf_link_cleanup(&link_primer);
+		goto out;
+	}
+	ret = bpf_link_settle(&link_primer);
+out:
+	rtnl_unlock();
+	return ret;
+}
+
 static void meta_release_all(struct net_device *dev)
 {
 	struct bpf_mprog_entry *entry;
@@ -589,7 +795,10 @@  static void meta_release_all(struct net_device *dev)
 	meta_entry_update(dev, NULL);
 	meta_entry_sync();
 	bpf_mprog_foreach_tuple(entry, fp, cp, tuple) {
-		bpf_prog_put(tuple.prog);
+		if (tuple.link)
+			meta_link(tuple.link)->dev = NULL;
+		else
+			bpf_prog_put(tuple.prog);
 	}
 }
 
diff --git a/include/net/meta.h b/include/net/meta.h
index 20fc61d05970..f1abe1d6d02d 100644
--- a/include/net/meta.h
+++ b/include/net/meta.h
@@ -7,6 +7,7 @@ 
 
 #ifdef CONFIG_META
 int meta_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+int meta_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 int meta_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog);
 int meta_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr);
 #else
@@ -16,6 +17,12 @@  static inline int meta_prog_attach(const union bpf_attr *attr,
 	return -EINVAL;
 }
 
+static inline int meta_link_attach(const union bpf_attr *attr,
+				   struct bpf_prog *prog)
+{
+	return -EINVAL;
+}
+
 static inline int meta_prog_detach(const union bpf_attr *attr,
 				   struct bpf_prog *prog)
 {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 00a875720e84..fd069f285fbc 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1068,6 +1068,7 @@  enum bpf_link_type {
 	BPF_LINK_TYPE_NETFILTER = 10,
 	BPF_LINK_TYPE_TCX = 11,
 	BPF_LINK_TYPE_UPROBE_MULTI = 12,
+	BPF_LINK_TYPE_META = 12,
 	MAX_BPF_LINK_TYPE,
 };
 
@@ -1653,6 +1654,13 @@  union bpf_attr {
 				__u32		flags;
 				__u32		pid;
 			} uprobe_multi;
+			struct {
+				union {
+					__u32	relative_fd;
+					__u32	relative_id;
+				};
+				__u64		expected_revision;
+			} meta;
 		};
 	} link_create;
 
@@ -6564,6 +6572,9 @@  struct bpf_link_info {
 			__u32 ifindex;
 			__u32 attach_type;
 		} tcx;
+		struct {
+			__u32 ifindex;
+		} meta;
 	};
 } __attribute__((aligned(8)));
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 51baf4355c39..b689da4de280 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4969,7 +4969,7 @@  static int link_create(union bpf_attr *attr, bpfptr_t uattr)
 		    attr->link_create.attach_type == BPF_TCX_EGRESS)
 			ret = tcx_link_attach(attr, prog);
 		else
-			ret = -EINVAL;
+			ret = meta_link_attach(attr, prog);
 		break;
 	case BPF_PROG_TYPE_NETFILTER:
 		ret = bpf_nf_link_attach(attr, prog);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 00a875720e84..fd069f285fbc 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1068,6 +1068,7 @@  enum bpf_link_type {
 	BPF_LINK_TYPE_NETFILTER = 10,
 	BPF_LINK_TYPE_TCX = 11,
 	BPF_LINK_TYPE_UPROBE_MULTI = 12,
+	BPF_LINK_TYPE_META = 12,
 	MAX_BPF_LINK_TYPE,
 };
 
@@ -1653,6 +1654,13 @@  union bpf_attr {
 				__u32		flags;
 				__u32		pid;
 			} uprobe_multi;
+			struct {
+				union {
+					__u32	relative_fd;
+					__u32	relative_id;
+				};
+				__u64		expected_revision;
+			} meta;
 		};
 	} link_create;
 
@@ -6564,6 +6572,9 @@  struct bpf_link_info {
 			__u32 ifindex;
 			__u32 attach_type;
 		} tcx;
+		struct {
+			__u32 ifindex;
+		} meta;
 	};
 } __attribute__((aligned(8)));