[bpf-next,v2,3/4] libbpf: add low level TC-BPF API

Message ID	20210419121811.117400-4-memxor@gmail.com (mailing list archive)
State	Superseded
Delegated to:	BPF
Headers	show Return-Path: <bpf-owner@kernel.org> From: Kumar Kartikeya Dwivedi <memxor@gmail.com> To: bpf@vger.kernel.org Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>, =?utf-8?q?Toke_H=C3=B8iland-?= =?utf-8?q?J=C3=B8rgensen?= <toke@redhat.com>, Alexei Starovoitov <ast@kernel.org>, Daniel Borkmann <daniel@iogearbox.net>, Andrii Nakryiko <andrii@kernel.org>, Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>, Yonghong Song <yhs@fb.com>, John Fastabend <john.fastabend@gmail.com>, KP Singh <kpsingh@kernel.org>, "David S. Miller" <davem@davemloft.net>, Jakub Kicinski <kuba@kernel.org>, Jesper Dangaard Brouer <hawk@kernel.org>, Jesper Dangaard Brouer <brouer@redhat.com>, linux-kernel@vger.kernel.org, netdev@vger.kernel.org Subject: [PATCH bpf-next v2 3/4] libbpf: add low level TC-BPF API Date: Mon, 19 Apr 2021 17:48:10 +0530 Message-Id: <20210419121811.117400-4-memxor@gmail.com> In-Reply-To: <20210419121811.117400-1-memxor@gmail.com> References: <20210419121811.117400-1-memxor@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	Add TC-BPF API \| expand [bpf-next,v2,0/4] Add TC-BPF API [bpf-next,v2,1/4] tools: pkt_cls.h: sync with kernel sources [bpf-next,v2,2/4] libbpf: add helpers for preparing netlink attributes [bpf-next,v2,3/4] libbpf: add low level TC-BPF API [bpf-next,v2,4/4] libbpf: add selftests for TC-BPF API

Context	Check	Description
netdev/cover_letter	success	Link
netdev/fixes_present	success	Link
netdev/patch_count	success	Link
netdev/tree_selection	success	Clearly marked for bpf-next
netdev/subject_prefix	success	Link
netdev/cc_maintainers	success	CCed 10 of 10 maintainers
netdev/source_inline	success	Was 0 now: 0
netdev/verify_signedoff	success	Link
netdev/module_param	success	Was 0 now: 0
netdev/build_32bit	success	Errors and warnings before: 0 this patch: 0
netdev/kdoc	success	Errors and warnings before: 0 this patch: 0
netdev/verify_fixes	success	Link
netdev/checkpatch	fail	CHECK: Please don't use multiple blank lines ERROR: space prohibited before that ':' (ctx:WxV) WARNING: Block comments use a trailing */ on a separate line WARNING: line length of 81 exceeds 80 columns
netdev/build_allmodconfig_warn	success	Errors and warnings before: 0 this patch: 0
netdev/header_inline	success	Link

diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index bec4e6a6e31d..2f4a2036cb74 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -16,6 +16,9 @@ #include <stdbool.h> #include <sys/types.h> // for size_t #include <linux/bpf.h> +#include <linux/pkt_cls.h> +#include <linux/pkt_sched.h> +#include <linux/tc_act/tc_bpf.h> #include "libbpf_common.h" @@ -775,6 +778,55 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filen LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); +/* Convenience macros for the clsact attach hooks */ +#define BPF_TC_CLSACT_INGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS) +#define BPF_TC_CLSACT_EGRESS TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS) + +struct bpf_tc_cls_opts { + size_t sz; + __u32 protocol; + __u32 handle; + __u32 chain_index; + __u32 priority; + __u32 class_id; + size_t :0; +}; + +#define bpf_tc_cls_opts__last_field class_id + +/* Acts as a handle for an attached filter */ +struct bpf_tc_cls_attach_id { + __u32 protocol; + __u32 chain_index; + __u32 handle; + __u32 priority; +}; + +struct bpf_tc_cls_info { + struct bpf_tc_cls_attach_id id; + __u32 prog_id; + __u8 tag[BPF_TAG_SIZE]; + __u32 class_id; + __u32 bpf_flags; + __u32 bpf_flags_gen; +}; + +/* id is out parameter that will be written to, it must not be NULL */ +LIBBPF_API int bpf_tc_cls_attach(int fd, __u32 ifindex, __u32 parent_id, + const struct bpf_tc_cls_opts *opts, + struct bpf_tc_cls_attach_id *id); +LIBBPF_API int bpf_tc_cls_change(int fd, __u32 ifindex, __u32 parent_id, + const struct bpf_tc_cls_opts *opts, + struct bpf_tc_cls_attach_id *id); +LIBBPF_API int bpf_tc_cls_replace(int fd, __u32 ifindex, __u32 parent_id, + const struct bpf_tc_cls_opts *opts, + struct bpf_tc_cls_attach_id *id); +LIBBPF_API int bpf_tc_cls_detach(__u32 ifindex, __u32 parent_id, + const struct bpf_tc_cls_attach_id *id); +LIBBPF_API int bpf_tc_cls_get_info(int fd, __u32 ifindex, __u32 parent_id, + const struct bpf_tc_cls_opts *opts, + struct bpf_tc_cls_info *info); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index b9b29baf1df8..52e5de1e82ea 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -361,4 +361,9 @@ LIBBPF_0.4.0 { bpf_linker__new; bpf_map__inner_map; bpf_object__set_kversion; + bpf_tc_cls_attach; + bpf_tc_cls_change; + bpf_tc_cls_detach; + bpf_tc_cls_replace; + bpf_tc_cls_get_info; } LIBBPF_0.3.0; diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index c79e30484e81..93cc1e027065 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -4,7 +4,11 @@ #include <stdlib.h> #include <memory.h> #include <unistd.h> +#include <inttypes.h> +#include <arpa/inet.h> #include <linux/bpf.h> +#include <linux/pkt_cls.h> +#include <linux/if_ether.h> #include <linux/rtnetlink.h> #include <sys/socket.h> #include <errno.h> @@ -131,6 +135,41 @@ static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq, return ret; } +static int tc_setup_clsact_excl(int sock, __u32 nl_pid, __u32 ifindex) +{ + int seq = 0, ret = 0; + struct { + struct nlmsghdr nh; + struct tcmsg t; + char buf[256]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.nh.nlmsg_flags = + NLM_F_REQUEST | NLM_F_CREATE | NLM_F_ACK | NLM_F_EXCL; + req.nh.nlmsg_type = RTM_NEWQDISC; + req.nh.nlmsg_pid = 0; + req.nh.nlmsg_seq = ++seq; + req.t.tcm_family = AF_UNSPEC; + req.t.tcm_ifindex = ifindex; + req.t.tcm_parent = TC_H_CLSACT; + req.t.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0); + + ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "clsact", + sizeof("clsact")); + if (ret < 0) + return ret; + + ret = send(sock, &req.nh, req.nh.nlmsg_len, 0); + if (ret < 0) + return ret; + + ret = bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL); + + return ret; +} + static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd, __u32 flags) { @@ -344,6 +383,20 @@ int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) return ret; } +static int bpf_nl_get_ext(struct nlmsghdr *nh, int sock, unsigned int nl_pid, + __dump_nlmsg_t dump_link_nlmsg_p, + libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) +{ + int seq = time(NULL); + + nh->nlmsg_seq = seq; + if (send(sock, nh, nh->nlmsg_len, 0) < 0) + return -errno; + + return bpf_netlink_recv(sock, nl_pid, seq, dump_link_nlmsg_p, + dump_link_nlmsg, cookie); +} + int libbpf_nl_get_link(int sock, unsigned int nl_pid, libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) { @@ -356,12 +409,324 @@ int libbpf_nl_get_link(int sock, unsigned int nl_pid, .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, .ifm.ifi_family = AF_PACKET, }; - int seq = time(NULL); - req.nlh.nlmsg_seq = seq; - if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) - return -errno; + return bpf_nl_get_ext(&req.nlh, sock, nl_pid, __dump_link_nlmsg, + dump_link_nlmsg, cookie); +} - return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg, - dump_link_nlmsg, cookie); +static int tc_bpf_add_fd_and_name(struct nlmsghdr *nh, size_t maxsz, int fd) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + char name[256] = {}; + int len, ret; + + ret = bpf_obj_get_info_by_fd(fd, &info, &info_len); + if (ret < 0) + return ret; + + ret = nlattr_add(nh, maxsz, TCA_BPF_FD, &fd, sizeof(fd)); + if (ret < 0) + return ret; + + len = snprintf(name, sizeof(name), "%s:[%" PRIu32 "]", info.name, + info.id); + if (len < 0 || len >= sizeof(name)) + return len < 0 ? -EINVAL : -ENAMETOOLONG; + + return nlattr_add(nh, maxsz, TCA_BPF_NAME, name, len + 1); +} + +struct pass_info { + struct bpf_tc_cls_info *info; + __u32 prog_id; +}; + +static int cls_get_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn, + void *cookie); + +static int tc_cls_bpf_modify(int fd, int cmd, unsigned int flags, __u32 ifindex, + __u32 parent_id, const struct bpf_tc_cls_opts *opts, + __dump_nlmsg_t fn, struct bpf_tc_cls_attach_id *id) +{ + struct bpf_tc_cls_info info = {}; + unsigned int bpf_flags = 0; + __u32 nl_pid = 0, protocol; + int sock, seq = 0, ret; + struct nlattr *nla; + struct { + struct nlmsghdr nh; + struct tcmsg t; + char buf[256]; + } req; + + if (OPTS_GET(opts, priority, 0) > 0xFFFF) + return -EINVAL; + + sock = libbpf_netlink_open(&nl_pid); + if (sock < 0) + return sock; + + if ((parent_id == BPF_TC_CLSACT_INGRESS || + parent_id == BPF_TC_CLSACT_EGRESS) && + flags & NLM_F_CREATE) { + ret = tc_setup_clsact_excl(sock, nl_pid, ifindex); + /* attachment can still fail if ingress qdisc is installed, and + * we're trying attach on egress as parent */ + if (ret < 0 && ret != -EEXIST) + goto end; + } + + protocol = OPTS_GET(opts, protocol, 0) ?: ETH_P_ALL; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags; + req.nh.nlmsg_type = cmd; + req.nh.nlmsg_pid = 0; + req.nh.nlmsg_seq = ++seq; + req.t.tcm_family = AF_UNSPEC; + req.t.tcm_handle = OPTS_GET(opts, handle, 0); + req.t.tcm_parent = parent_id; + req.t.tcm_ifindex = ifindex; + req.t.tcm_info = + TC_H_MAKE(OPTS_GET(opts, priority, 0UL) << 16, htons(protocol)); + + if (OPTS_HAS(opts, chain_index)) { + ret = nlattr_add(&req.nh, sizeof(req), TCA_CHAIN, + &opts->chain_index, sizeof(opts->chain_index)); + if (ret < 0) + goto end; + } + + ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf")); + if (ret < 0) + goto end; + + nla = nlattr_begin_nested(&req.nh, sizeof(req), TCA_OPTIONS); + if (!nla) { + ret = -EMSGSIZE; + goto end; + } + + if (OPTS_GET(opts, class_id, TC_H_UNSPEC)) { + ret = nlattr_add(&req.nh, sizeof(req), TCA_BPF_CLASSID, + &opts->class_id, sizeof(opts->class_id)); + if (ret < 0) + goto end; + } + + if (cmd != RTM_DELTFILTER) { + ret = tc_bpf_add_fd_and_name(&req.nh, sizeof(req), fd); + if (ret < 0) + goto end; + + /* direct action is always set */ + bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT; + ret = nlattr_add(&req.nh, sizeof(req), TCA_BPF_FLAGS, + &bpf_flags, sizeof(bpf_flags)); + if (ret < 0) + goto end; + } + + nlattr_end_nested(&req.nh, nla); + + ret = send(sock, &req.nh, req.nh.nlmsg_len, 0); + if (ret < 0) + goto end; + + ret = bpf_netlink_recv(sock, nl_pid, seq, fn, NULL, + &(struct pass_info){ &info, 0 }); + + if (fn) + *id = info.id; + +end: + close(sock); + return ret; +} + +int bpf_tc_cls_attach(int fd, __u32 ifindex, __u32 parent_id, + const struct bpf_tc_cls_opts *opts, + struct bpf_tc_cls_attach_id *id) +{ + if (fd < 0 || !OPTS_VALID(opts, bpf_tc_cls_opts) || !id) + return -EINVAL; + + return tc_cls_bpf_modify(fd, RTM_NEWTFILTER, + NLM_F_ECHO | NLM_F_EXCL | NLM_F_CREATE, + ifindex, parent_id, opts, cls_get_info, id); +} + +int bpf_tc_cls_change(int fd, __u32 ifindex, __u32 parent_id, + const struct bpf_tc_cls_opts *opts, + struct bpf_tc_cls_attach_id *id) +{ + if (fd < 0 || !OPTS_VALID(opts, bpf_tc_cls_opts) || !id) + return -EINVAL; + + return tc_cls_bpf_modify(fd, RTM_NEWTFILTER, NLM_F_ECHO, ifindex, + parent_id, opts, cls_get_info, id); +} + +int bpf_tc_cls_replace(int fd, __u32 ifindex, __u32 parent_id, + const struct bpf_tc_cls_opts *opts, + struct bpf_tc_cls_attach_id *id) +{ + if (fd < 0 || !OPTS_VALID(opts, bpf_tc_cls_opts) || !id) + return -EINVAL; + + return tc_cls_bpf_modify(fd, RTM_NEWTFILTER, NLM_F_ECHO | NLM_F_CREATE, + ifindex, parent_id, opts, cls_get_info, id); +} + +int bpf_tc_cls_detach(__u32 ifindex, __u32 parent_id, + const struct bpf_tc_cls_attach_id *id) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_cls_opts, opts, 0); + + if (!id) + return -EINVAL; + + opts.protocol = id->protocol; + opts.chain_index = id->chain_index; + opts.handle = id->handle; + opts.priority = id->priority; + + return tc_cls_bpf_modify(-1, RTM_DELTFILTER, 0, ifindex, parent_id, + &opts, NULL, NULL); +} + +static int __cls_get_info(void *cookie, void *msg, struct nlattr **tb) +{ + struct nlattr *tbb[TCA_BPF_MAX + 1]; + struct pass_info *cinfo = cookie; + struct bpf_tc_cls_info *info; + struct tcmsg *t = msg; + __u32 prog_id; + + info = cinfo->info; + + if (!tb[TCA_OPTIONS]) + return 0; + + libbpf_nla_parse_nested(tbb, TCA_BPF_MAX, tb[TCA_OPTIONS], NULL); + if (!tbb[TCA_BPF_ID]) + return 0; + + prog_id = libbpf_nla_getattr_u32(tbb[TCA_BPF_ID]); + if (cinfo->prog_id && cinfo->prog_id != prog_id) + return 0; + + info->id.protocol = ntohs(TC_H_MIN(t->tcm_info)); + info->id.priority = TC_H_MAJ(t->tcm_info) >> 16; + info->id.handle = t->tcm_handle; + + if (tb[TCA_CHAIN]) + info->id.chain_index = libbpf_nla_getattr_u32(tb[TCA_CHAIN]); + else + info->id.chain_index = 0; + + if (tbb[TCA_BPF_FLAGS]) + info->bpf_flags = libbpf_nla_getattr_u32(tbb[TCA_BPF_FLAGS]); + + if (tbb[TCA_BPF_FLAGS_GEN]) + info->bpf_flags_gen = + libbpf_nla_getattr_u32(tbb[TCA_BPF_FLAGS_GEN]); + + if (tbb[TCA_BPF_ID]) + info->prog_id = libbpf_nla_getattr_u32(tbb[TCA_BPF_ID]); + + if (tbb[TCA_BPF_TAG]) + memcpy(info->tag, libbpf_nla_getattr_str(tbb[TCA_BPF_TAG]), + sizeof(info->tag)); + + if (tbb[TCA_BPF_CLASSID]) + info->class_id = libbpf_nla_getattr_u32(tbb[TCA_BPF_CLASSID]); + + return 1; +} + +static int cls_get_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn, + void *cookie) +{ + struct tcmsg *t = NLMSG_DATA(nh); + struct nlattr *tb[TCA_MAX + 1]; + + libbpf_nla_parse(tb, TCA_MAX, + (struct nlattr *)((char *)t + NLMSG_ALIGN(sizeof(*t))), + NLMSG_PAYLOAD(nh, sizeof(*t)), NULL); + if (!tb[TCA_KIND]) + return -EINVAL; + + return __cls_get_info(cookie, t, tb); +} + +static int tc_cls_get_info(int fd, __u32 ifindex, __u32 parent_id, + const struct bpf_tc_cls_opts *opts, + struct bpf_tc_cls_info *info) +{ + __u32 nl_pid = 0, protocol, info_len = sizeof(struct bpf_prog_info); + struct bpf_prog_info prog_info = {}; + int sock, ret; + struct { + struct nlmsghdr nh; + struct tcmsg t; + char buf[256]; + } req = { + .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .nh.nlmsg_type = RTM_GETTFILTER, + .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP, + .t.tcm_family = AF_UNSPEC, + }; + + + if (!OPTS_VALID(opts, bpf_tc_cls_opts)) + return -EINVAL; + + protocol = OPTS_GET(opts, protocol, 0) ?: ETH_P_ALL; + + req.t.tcm_parent = parent_id; + req.t.tcm_ifindex = ifindex; + req.t.tcm_handle = OPTS_GET(opts, handle, 0); + req.t.tcm_info = + TC_H_MAKE(OPTS_GET(opts, priority, 0UL) << 16, htons(protocol)); + + ret = bpf_obj_get_info_by_fd(fd, &prog_info, &info_len); + if (ret < 0) + return ret; + + sock = libbpf_netlink_open(&nl_pid); + if (sock < 0) + return sock; + + ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf")); + if (ret < 0) + goto end; + + if (OPTS_HAS(opts, chain_index)) { + ret = nlattr_add(&req.nh, sizeof(req), TCA_CHAIN, + &opts->chain_index, sizeof(opts->chain_index)); + if (ret < 0) + goto end; + } + + req.nh.nlmsg_seq = time(NULL); + + ret = bpf_nl_get_ext(&req.nh, sock, nl_pid, cls_get_info, NULL, + &(struct pass_info){ info, prog_info.id }); + if (ret < 0) + goto end; + /* 1 denotes a match */ + ret = ret == 1 ? 0 : -ESRCH; +end: + close(sock); + return ret; +} + +int bpf_tc_cls_get_info(int fd, __u32 ifindex, __u32 parent_id, + const struct bpf_tc_cls_opts *opts, + struct bpf_tc_cls_info *info) +{ + return tc_cls_get_info(fd, ifindex, parent_id, opts, info); }

[bpf-next,v2,3/4] libbpf: add low level TC-BPF API

Checks

Commit Message

Comments

Patch