@@ -429,4 +429,59 @@ extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __weak __ksym;
)
#endif /* bpf_repeat */
+#define DEFINE_STATIC_KEY(NAME) \
+ struct { \
+ __uint(type, BPF_MAP_TYPE_INSN_SET); \
+ __type(key, __u32); \
+ __type(value, __u32); \
+ __uint(map_extra, BPF_F_STATIC_KEY); \
+ } NAME SEC(".maps")
+
+static __always_inline int __bpf_static_branch_nop(void *static_key)
+{
+ asm goto("1:\n\t"
+ "gotol_or_nop %l[l_yes]\n\t"
+ ".pushsection .static_keys, \"aw\"\n\t"
+ ".balign 8\n\t"
+ ".long 1b - .\n\t"
+ ".long %l[l_yes] - .\n\t"
+ ".quad %c0 - .\n\t"
+ ".popsection\n\t"
+ :: "i" (static_key)
+ :: l_yes);
+ return 0;
+l_yes:
+ return 1;
+}
+
+static __always_inline int __bpf_static_branch_jump(void *static_key)
+{
+ asm goto("1:\n\t"
+ "nop_or_gotol %l[l_yes]\n\t"
+ ".pushsection .static_keys, \"aw\"\n\t"
+ ".balign 8\n\t"
+ ".long 1b - .\n\t"
+ ".long %l[l_yes] - .\n\t"
+ ".quad %c0 - . + 1\n\t"
+ ".popsection\n\t"
+ :: "i" (static_key)
+ :: l_yes);
+ return 0;
+l_yes:
+ return 1;
+}
+
+/*
+ * The bpf_static_branch_{unlikely,likely} macros provide a way to utilize BPF
+ * Static Keys in BPF programs in exactly the same manner this is done in the
+ * Linux Kernel. The "unlikely" macro compiles in the code where the else-branch
+ * (key is off) is prioritized, the "likely" macro prioritises the if-branch.
+ */
+
+#define bpf_static_branch_unlikely(static_key) \
+ unlikely(__bpf_static_branch_nop(static_key))
+
+#define bpf_static_branch_likely(static_key) \
+ likely(!__bpf_static_branch_jump(static_key))
+
#endif
@@ -422,6 +422,17 @@ struct bpf_sec_def {
libbpf_prog_attach_fn_t prog_attach_fn;
};
+struct static_key_insn {
+ __u32 insn_offset;
+ __u32 jump_target;
+};
+
+struct static_key {
+ struct bpf_map *map;
+ struct static_key_insn *insns;
+ __u32 insns_cnt;
+};
+
/*
* bpf_prog should be a better name but it has been used in
* linux/filter.h.
@@ -494,6 +505,9 @@ struct bpf_program {
__u32 line_info_rec_size;
__u32 line_info_cnt;
__u32 prog_flags;
+
+ struct static_key *static_keys;
+ __u32 static_keys_cnt;
};
struct bpf_struct_ops {
@@ -523,6 +537,7 @@ struct bpf_struct_ops {
#define STRUCT_OPS_SEC ".struct_ops"
#define STRUCT_OPS_LINK_SEC ".struct_ops.link"
#define ARENA_SEC ".addr_space.1"
+#define STATIC_KEYS_SEC ".static_keys"
enum libbpf_map_type {
LIBBPF_MAP_UNSPEC,
@@ -656,6 +671,7 @@ struct elf_state {
Elf64_Ehdr *ehdr;
Elf_Data *symbols;
Elf_Data *arena_data;
+ Elf_Data *static_keys_data;
size_t shstrndx; /* section index for section name strings */
size_t strtabidx;
struct elf_sec_desc *secs;
@@ -666,6 +682,7 @@ struct elf_state {
int symbols_shndx;
bool has_st_ops;
int arena_data_shndx;
+ int static_keys_data_shndx;
};
struct usdt_manager;
@@ -763,6 +780,7 @@ void bpf_program__unload(struct bpf_program *prog)
zfree(&prog->func_info);
zfree(&prog->line_info);
+ zfree(&prog->static_keys);
}
static void bpf_program__exit(struct bpf_program *prog)
@@ -1895,6 +1913,213 @@ static char *internal_map_name(struct bpf_object *obj, const char *real_name)
return strdup(map_name);
}
+struct static_keys_table_entry {
+ __u32 insn_offset;
+ __u32 jump_target;
+ union {
+ __u64 map_ptr; /* map_ptr is always zero, as it is relocated */
+ __u64 flags; /* so we can reuse it to store flags */
+ };
+};
+
+static struct bpf_program *shndx_to_prog(struct bpf_object *obj,
+ size_t sec_idx,
+ struct static_keys_table_entry *entry)
+{
+ __u32 insn_offset = entry->insn_offset / 8;
+ __u32 jump_target = entry->jump_target / 8;
+ struct bpf_program *prog;
+ size_t i;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+ if (prog->sec_idx != sec_idx)
+ continue;
+
+ if (insn_offset < prog->sec_insn_off ||
+ insn_offset >= prog->sec_insn_off + prog->sec_insn_cnt)
+ continue;
+
+ if (jump_target < prog->sec_insn_off ||
+ jump_target >= prog->sec_insn_off + prog->sec_insn_cnt) {
+ pr_warn("static key: offset %u is in boundaries, target %u is not\n",
+ insn_offset, jump_target);
+ return NULL;
+ }
+
+ return prog;
+ }
+
+ return NULL;
+}
+
+static struct bpf_program *find_prog_for_jump_entry(struct bpf_object *obj,
+ int nrels,
+ Elf_Data *relo_data,
+ __u32 entry_offset,
+ struct static_keys_table_entry *entry)
+{
+ struct bpf_program *prog;
+ Elf64_Rel *rel;
+ Elf64_Sym *sym;
+ int i;
+
+ for (i = 0; i < nrels; i++) {
+ rel = elf_rel_by_idx(relo_data, i);
+ if (!rel) {
+ pr_warn("static key: relo #%d: failed to get ELF relo\n", i);
+ return ERR_PTR(-LIBBPF_ERRNO__FORMAT);
+ }
+
+ if ((__u32)rel->r_offset != entry_offset)
+ continue;
+
+ sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
+ if (!sym) {
+ pr_warn("static key: .maps relo #%d: symbol %zx not found\n",
+ i, (size_t)ELF64_R_SYM(rel->r_info));
+ return ERR_PTR(-LIBBPF_ERRNO__FORMAT);
+ }
+
+ prog = shndx_to_prog(obj, sym->st_shndx, entry);
+ if (!prog) {
+ pr_warn("static key: .maps relo #%d: program %zx not found\n",
+ i, (size_t)sym->st_shndx);
+ return ERR_PTR(-LIBBPF_ERRNO__FORMAT);
+ }
+ return prog;
+ }
+ return ERR_PTR(-LIBBPF_ERRNO__FORMAT);
+}
+
+static struct bpf_map *find_map_for_jump_entry(struct bpf_object *obj,
+ int nrels,
+ Elf_Data *relo_data,
+ __u32 entry_offset)
+{
+ struct bpf_map *map;
+ const char *name;
+ Elf64_Rel *rel;
+ Elf64_Sym *sym;
+ int i;
+
+ for (i = 0; i < nrels; i++) {
+ rel = elf_rel_by_idx(relo_data, i);
+ if (!rel) {
+ pr_warn("static key: relo #%d: failed to get ELF relo\n", i);
+ return NULL;
+ }
+
+ if ((__u32)rel->r_offset != entry_offset)
+ continue;
+
+ sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
+ if (!sym) {
+ pr_warn(".maps relo #%d: symbol %zx not found\n",
+ i, (size_t)ELF64_R_SYM(rel->r_info));
+ return NULL;
+ }
+
+ name = elf_sym_str(obj, sym->st_name) ?: "<?>";
+ if (!name || !strcmp(name, "")) {
+ pr_warn(".maps relo #%d: symbol name is zero or empty\n", i);
+ return NULL;
+ }
+
+ map = bpf_object__find_map_by_name(obj, name);
+ if (!map)
+ return NULL;
+ return map;
+ }
+ return NULL;
+}
+
+static struct static_key *find_static_key(struct bpf_program *prog, struct bpf_map *map)
+{
+ __u32 i;
+
+ for (i = 0; i < prog->static_keys_cnt; i++)
+ if (prog->static_keys[i].map == map)
+ return &prog->static_keys[i];
+
+ return NULL;
+}
+
+static int add_static_key_insn(struct bpf_program *prog,
+ struct static_keys_table_entry *entry,
+ struct bpf_map *map)
+{
+ struct static_key_insn *insn;
+ struct static_key *key;
+ void *x;
+
+ key = find_static_key(prog, map);
+ if (!key) {
+ __u32 size_old = prog->static_keys_cnt * sizeof(*key);
+
+ x = realloc(prog->static_keys, size_old + sizeof(*key));
+ if (!x)
+ return -ENOMEM;
+
+ prog->static_keys = x;
+ prog->static_keys_cnt += 1;
+
+ key = x + size_old;
+ key->map = map;
+ key->insns = NULL;
+ key->insns_cnt = 0;
+ }
+
+ x = realloc(key->insns, (key->insns_cnt + 1) * sizeof(key->insns[0]));
+ if (!x)
+ return -ENOMEM;
+
+ key->insns = x;
+ insn = &key->insns[key->insns_cnt++];
+ insn->insn_offset = (entry->insn_offset / 8) - prog->sec_insn_off;
+ insn->jump_target = (entry->jump_target / 8) - prog->sec_insn_off;
+ key->map->def.max_entries += 1;
+
+ return 0;
+}
+
+static int
+bpf_object__collect_static_keys_relos(struct bpf_object *obj,
+ Elf64_Shdr *shdr,
+ Elf_Data *relo_data)
+{
+ Elf_Data *data = obj->efile.static_keys_data;
+ int nrels = shdr->sh_size / shdr->sh_entsize;
+ struct static_keys_table_entry *entries;
+ size_t i;
+ int err;
+
+ if (!data)
+ return 0;
+
+ entries = (void *)data->d_buf;
+ for (i = 0; i < data->d_size / sizeof(struct static_keys_table_entry); i++) {
+ __u32 entry_offset = i * sizeof(struct static_keys_table_entry);
+ struct bpf_program *prog;
+ struct bpf_map *map;
+
+ prog = find_prog_for_jump_entry(obj, nrels, relo_data, entry_offset, &entries[i]);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ map = find_map_for_jump_entry(obj, nrels, relo_data,
+ entry_offset + offsetof(struct static_keys_table_entry, map_ptr));
+ if (!map)
+ return -EINVAL;
+
+ err = add_static_key_insn(prog, &entries[i], map);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int
map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map);
@@ -3951,6 +4176,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
} else if (strcmp(name, ARENA_SEC) == 0) {
obj->efile.arena_data = data;
obj->efile.arena_data_shndx = idx;
+ } else if (strcmp(name, STATIC_KEYS_SEC) == 0) {
+ obj->efile.static_keys_data = data;
+ obj->efile.static_keys_data_shndx = idx;
} else {
pr_info("elf: skipping unrecognized data section(%d) %s\n",
idx, name);
@@ -3968,7 +4196,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) &&
strcmp(name, ".rel?" STRUCT_OPS_SEC) &&
strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) &&
- strcmp(name, ".rel" MAPS_ELF_SEC)) {
+ strcmp(name, ".rel" MAPS_ELF_SEC) &&
+ strcmp(name, ".rel" STATIC_KEYS_SEC)) {
pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
idx, name, targ_sec_idx,
elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>");
@@ -5200,6 +5429,69 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
return 0;
}
+static struct static_key *
+bpf_object__find_static_key(struct bpf_object *obj, struct bpf_map *map)
+{
+ struct static_key *key = NULL;
+ int i;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ key = find_static_key(&obj->programs[i], map);
+ if (key)
+ return key;
+ }
+
+ return NULL;
+}
+
+static int bpf_object__init_static_key_map(struct bpf_object *obj,
+ struct bpf_map *map)
+{
+ struct static_key *key;
+ __u32 map_key;
+ int err;
+ int i;
+
+ if (obj->gen_loader) {
+ pr_warn("not supported: obj->gen_loader ^ static keys\n");
+ return libbpf_err(-ENOTSUP);
+ }
+
+ key = bpf_object__find_static_key(obj, map);
+ if (!key) {
+ pr_warn("map '%s': static key is not used by any program\n",
+ bpf_map__name(map));
+ return libbpf_err(-EINVAL);
+ }
+
+ if (key->insns_cnt != map->def.max_entries) {
+ pr_warn("map '%s': static key #entries and max_entries differ: %d != %d\n",
+ bpf_map__name(map), key->insns_cnt, map->def.max_entries);
+ return libbpf_err(-EINVAL);
+ }
+
+ for (i = 0; i < key->insns_cnt; i++) {
+ map_key = key->insns[i].insn_offset;
+ err = bpf_map_update_elem(map->fd, &i, &map_key, 0);
+ if (err) {
+ err = -errno;
+ pr_warn("map '%s': failed to set initial contents: %s\n",
+ bpf_map__name(map), errstr(err));
+ return err;
+ }
+ }
+
+ err = bpf_map_freeze(map->fd);
+ if (err) {
+ err = -errno;
+ pr_warn("map '%s': failed to freeze as read-only: %s\n",
+ bpf_map__name(map), errstr(err));
+ return err;
+ }
+
+ return 0;
+}
+
static void bpf_map__destroy(struct bpf_map *map);
static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
@@ -5520,6 +5812,12 @@ bpf_object__create_maps(struct bpf_object *obj)
memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz);
zfree(&obj->arena_data);
}
+ } else if (map->def.type == BPF_MAP_TYPE_INSN_SET) {
+ if (map->map_extra & BPF_F_STATIC_KEY) {
+ err = bpf_object__init_static_key_map(obj, map);
+ if (err < 0)
+ goto err_out;
+ }
}
if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
err = init_map_in_map_slots(obj, map);
@@ -6344,10 +6642,43 @@ static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, si
sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
}
+static int append_subprog_static_keys(struct bpf_program *main_prog,
+ struct bpf_program *subprog)
+{
+ size_t main_size = main_prog->static_keys_cnt * sizeof(struct static_key);
+ size_t subprog_size = subprog->static_keys_cnt * sizeof(struct static_key);
+ struct static_key *key;
+ void *new_keys;
+ int i, j;
+
+ if (!subprog->static_keys_cnt)
+ return 0;
+
+ new_keys = realloc(main_prog->static_keys, subprog_size + main_size);
+ if (!new_keys)
+ return -ENOMEM;
+
+ memcpy(new_keys + main_size, subprog->static_keys, subprog_size);
+
+ for (i = 0; i < subprog->static_keys_cnt; i++) {
+ key = new_keys + main_size + i * sizeof(struct static_key);
+ for (j = 0; j < key->insns_cnt; j++) {
+ key->insns[j].insn_offset += subprog->sub_insn_off;
+ key->insns[j].jump_target += subprog->sub_insn_off;
+ }
+ }
+
+ main_prog->static_keys = new_keys;
+ main_prog->static_keys_cnt += subprog->static_keys_cnt;
+
+ return 0;
+}
+
static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
{
int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
struct reloc_desc *relos;
+ int err;
int i;
if (main_prog == subprog)
@@ -6370,6 +6701,11 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra
*/
main_prog->reloc_desc = relos;
main_prog->nr_reloc = new_cnt;
+
+ err = append_subprog_static_keys(main_prog, subprog);
+ if (err)
+ return err;
+
return 0;
}
@@ -7337,6 +7673,8 @@ static int bpf_object__collect_relos(struct bpf_object *obj)
err = bpf_object__collect_st_ops_relos(obj, shdr, data);
else if (idx == obj->efile.btf_maps_shndx)
err = bpf_object__collect_map_relos(obj, shdr, data);
+ else if (idx == obj->efile.static_keys_data_shndx)
+ err = bpf_object__collect_static_keys_relos(obj, shdr, data);
else
err = bpf_object__collect_prog_relos(obj, shdr, data);
if (err)
@@ -7461,6 +7799,7 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog,
opts->attach_btf_obj_fd = btf_obj_fd;
opts->attach_btf_id = btf_type_id;
}
+
return 0;
}
@@ -7551,6 +7890,27 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog
return 0;
}
+ if (obj->fd_array_cnt) {
+ pr_warn("not supported: fd_array was already present\n");
+ return -ENOTSUP;
+ } else if (prog->static_keys_cnt) {
+ int i, fd, *fd_array;
+
+ fd_array = calloc(prog->static_keys_cnt, sizeof(int));
+ if (!fd_array)
+ return -ENOMEM;
+
+ for (i = 0; i < prog->static_keys_cnt; i++) {
+ fd = prog->static_keys[i].map->fd;
+ if (fd < 0)
+ return -EINVAL;
+ fd_array[i] = fd;
+ }
+
+ load_attr.fd_array = fd_array;
+ load_attr.fd_array_cnt = prog->static_keys_cnt;
+ }
+
retry_load:
/* if log_level is zero, we don't request logs initially even if
* custom log_buf is specified; if the program load fails, then we'll
@@ -56,6 +56,9 @@
#ifndef R_BPF_64_ABS32
#define R_BPF_64_ABS32 3
#endif
+#ifndef R_BPF_64_NODYLD32
+#define R_BPF_64_NODYLD32 4
+#endif
#ifndef R_BPF_64_32
#define R_BPF_64_32 10
#endif
@@ -28,6 +28,7 @@
#include "str_error.h"
#define BTF_EXTERN_SEC ".extern"
+#define STATIC_KEYS_REL_SEC ".rel.static_keys"
struct src_sec {
const char *sec_name;
@@ -1037,7 +1038,8 @@ static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *se
size_t sym_type = ELF64_R_TYPE(relo->r_info);
if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32 &&
- sym_type != R_BPF_64_ABS64 && sym_type != R_BPF_64_ABS32) {
+ sym_type != R_BPF_64_ABS64 && sym_type != R_BPF_64_ABS32 &&
+ sym_type != R_BPF_64_NODYLD32 && strcmp(sec->sec_name, STATIC_KEYS_REL_SEC)) {
pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n",
i, sec->sec_idx, sym_type, obj->filename);
return -EINVAL;
@@ -2272,7 +2274,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
insn->imm += sec->dst_off / sizeof(struct bpf_insn);
else
insn->imm += sec->dst_off;
- } else {
+ } else if (strcmp(src_sec->sec_name, STATIC_KEYS_REL_SEC)) {
pr_warn("relocation against STT_SECTION in non-exec section is not supported!\n");
return -EINVAL;
}
Introduce the DEFINE_STATIC_KEY() and bpf_static_branch_{unlikely,likely} macros to mimic Linux Kernel Static Keys API in BPF. Example of usage would be as follows: DEFINE_STATIC_KEY(key); void prog(void) { if (bpf_static_branch_unlikely(key)) /* rarely used code */ else /* default hot path code */ } or, using the likely variant: void prog(void) { if (bpf_static_branch_likely(key)) /* default hot path code */ else /* rarely used code */ } The "unlikely" version of macro compiles in the code where the else-branch (key is off) is fall-through, the "likely" macro prioritises the if-branch. Both macros push an entry in the new ".static_keys" section, which contains the following information: 32 bits 32 bits 64 bits offset of jump instruction | offset of jump target | flags The corresponding ".rel.static_keys" relocations table entry contains the static key name. This information is enough to construct corresponding INSN_SET maps. NOTE. This is an RFC version of the patch. The main design flow of it is what to do when a static key is used in a noinline function and/or in two BPF programs. Consider the following example: DEFINE_STATIC_KEY(key); static __noinline foo() { if (bpf_static_key_unlikely(&key)) { /* do something special */ } ... } SEC("xdp") int prog1(ctx) { foo(); ... } SEC("xdp") int prog2(ctx) { foo(); ... } The problem here is that when such an ELF object is parsed and loaded by libbpf, then, from the kernel point of view, two programs are loaded: prog1 + a copy of "foo", then prog2 + a copy of "foo". However, the static key "key" can only be used in one program (and, of course, it will point to different instructions in both cases, as prog1/prog2 have different sizes + there might be more relocations). The solution is to actually create private copies of the key "key" per "load object". This automatically allows to reuse the "same" static key for multiple programs. From the uAPI perspective, the bpf_static_key_update() system call only operates on a particular "atomic" object -- a map representing the static key. However, there should be a way to toggle all the keys derived from the "key" (this should looks more natural for a user, as from the C perspective there is only one object). So, the following changes to the API should be * when libbpf opens an object, it replaces "key" with private per-prog instances "prog1_key", "prog2_key", etc. Then these static keys can be already set individually by the bpf syscall * for "wrapper API", either introduce a helper which takes a skeleton and key name, or just generate a helper in the generated skeleton (does this introduce new API as well?) Some other bugs included in this patch (as before the libbpf API is discussed, this might be painful to re-implement this patch + selftests). One obvious bug is that gen-loader is not supported. Another one related to fd_array. Namely, in order to pass static keys on load, they should be placed in fd_array, and fd_array_cnt must be set. The current code in libbpf creates an fd_array which is shared between all the programs in the ELF object, which doesn't work if fd_array_cnt is set to non-zero, as all maps/btfs in fd_array[0,...,fd_array_cnt-1] are bound to the program. So instead, loader should create private copy of fd_array per bpf_prog_load. Signed-off-by: Anton Protopopov <aspsk@isovalent.com> --- tools/lib/bpf/bpf_helpers.h | 55 +++++ tools/lib/bpf/libbpf.c | 362 +++++++++++++++++++++++++++++++- tools/lib/bpf/libbpf_internal.h | 3 + tools/lib/bpf/linker.c | 6 +- 4 files changed, 423 insertions(+), 3 deletions(-)