Message ID | 20250210135129.719119-2-mykyta.yatsenko5@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | selftests/bpf: implement setting global variables in veristat | expand |
On Mon, Feb 10, 2025 at 5:51 AM Mykyta Yatsenko <mykyta.yatsenko5@gmail.com> wrote: > > From: Mykyta Yatsenko <yatsenko@meta.com> > > To better verify some complex BPF programs we'd like to preset global > variables. > This patch introduces CLI argument `--set-global-vars` or `-G` to > veristat, that allows presetting values to global variables defined > in BPF program. For example: > > prog.c: > ``` > enum Enum { ELEMENT1 = 0, ELEMENT2 = 5 }; > const volatile __s64 a = 5; > const volatile __u8 b = 5; > const volatile enum Enum c = ELEMENT2; > const volatile bool d = false; > > char arr[4] = {0}; > > SEC("tp_btf/sched_switch") > int BPF_PROG(...) > { > bpf_printk("%c\n", arr[a]); > bpf_printk("%c\n", arr[b]); > bpf_printk("%c\n", arr[c]); > bpf_printk("%c\n", arr[d]); > return 0; > } > ``` > By default verification of the program fails: > ``` > ./veristat prog.bpf.o > ``` > By presetting global variables, we can make verification pass: > ``` > ./veristat wq.bpf.o -G "a = 0" -G "b = 1" -G "c = 2" -G "d = 3" > ``` > > Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com> > --- > tools/testing/selftests/bpf/veristat.c | 319 ++++++++++++++++++++++++- > 1 file changed, 307 insertions(+), 12 deletions(-) > > diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c > index 06af5029885b..b4521ebb6e6a 100644 > --- a/tools/testing/selftests/bpf/veristat.c > +++ b/tools/testing/selftests/bpf/veristat.c > @@ -154,6 +154,15 @@ struct filter { > bool abs; > }; > > +struct var_preset { > + char *name; > + enum { INTEGRAL, NAME } type; > + union { > + long long ivalue; > + char *svalue; > + }; > +}; > + > static struct env { > char **filenames; > int filename_cnt; > @@ -195,6 +204,8 @@ static struct env { > int progs_processed; > int progs_skipped; > int top_src_lines; > + struct var_preset *presets; > + int npresets; > } env; > > static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) > @@ -246,12 +257,16 @@ static const struct argp_option opts[] = { > { "test-reg-invariants", 'r', NULL, 0, > "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" }, > { "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" }, > + { "set-global-vars", 'G', "GLOBALS", 0, "Set global variables provided in the expression, for example \"var1 = 1\"" }, > {}, > }; > > static int parse_stats(const char *stats_str, struct stat_specs *specs); > static int append_filter(struct filter **filters, int *cnt, const char *str); > static int append_filter_file(const char *path); > +static int parse_var_presets(char *expr, struct var_preset **presets, int *capacity, int *size); > +static int parse_var_presets_from_file(const char *filename, struct var_preset **presets, > + int *capacity, int *size); nit: append_filter vs append_filter_file would imply this should be parse_var_presets vs parse_var_presets_file, no? > > static error_t parse_arg(int key, char *arg, struct argp_state *state) > { > @@ -363,6 +378,24 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) > return -ENOMEM; > env.filename_cnt++; > break; > + case 'G': { > + static int presets_cap; > + char *expr = strdup(arg); > + > + if (expr[0] == '@') { > + if (parse_var_presets_from_file(expr + 1, &env.presets, > + &presets_cap, &env.npresets)) { > + fprintf(stderr, "Could not parse global variables preset: %s\n", > + arg); > + argp_usage(state); > + } > + } else if (parse_var_presets(expr, &env.presets, &presets_cap, &env.npresets)) { > + fprintf(stderr, "Could not parse global variables preset: %s\n", arg); > + argp_usage(state); > + } > + free(expr); > + break; > + } Can you please follow the append_filter pattern here? Consistency is good. I don't think we want presets_cap and expr here as well. Don't micro-optimize, it's ok to call realloc() for each entry for non-performance critical code. Internally libc won't really reallocate every single time. > default: > return ARGP_ERR_UNKNOWN; > } > @@ -1292,6 +1325,273 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf > return 0; > }; > > +static int parse_var_presets(char *expr, struct var_preset **presets, int *capacity, int *size) > +{ > + char *eq_ptr = strchr(expr, '='); > + char *name_ptr = expr; > + char *name_end = eq_ptr - 1; > + char *val_ptr = eq_ptr + 1; > + long long value; > + > + if (!eq_ptr) { > + fprintf(stderr, "No assignment in expression\n"); > + return -EINVAL; > + } > + > + while (isspace(*name_ptr)) > + ++name_ptr; > + while (isspace(*name_end)) > + --name_end; > + while (isspace(*val_ptr)) > + ++val_ptr; here's a pro tip: scanf() is pretty useful and powerful parser for simple stuff. scanf(" %s = %[^\n]\n") will trim spaces around variable name and equality and will capture the rest of string. Or if you do " %s = %s\n" it will trim spaces and do the right thing as long as we don't expect whitespace to be valid value (we can start there for now, because it's true for integers and enums) > + > + if (name_ptr > name_end) { > + fprintf(stderr, "Empty variable name in expression %s\n", expr); > + return -EINVAL; > + } > + > + if (*size >= *capacity) { > + *capacity = max(*capacity * 2, 1); > + *presets = realloc(*presets, *capacity * sizeof(**presets)); > + } > + as I mentioned above, don't bother optimizing and keeping track of capacity, just realloc() every single time, it's fine > + if (isalpha(*val_ptr)) { > + char *value_end = val_ptr + strlen(val_ptr) - 1; > + > + while (isspace(*value_end)) > + --value_end; > + *(value_end + 1) = '\0'; > + > + (*presets)[*size].svalue = strdup(val_ptr); > + (*presets)[*size].type = NAME; > + } else if (*val_ptr == '-' || isdigit(*val_ptr)) { > + errno = 0; > + value = strtoll(val_ptr, NULL, 0); > + if (errno == ERANGE) { > + errno = 0; > + value = strtoull(val_ptr, NULL, 0); > + } > + (*presets)[*size].ivalue = value; > + (*presets)[*size].type = INTEGRAL; > + if (errno) { > + fprintf(stderr, "Could not parse integer value %s\n", val_ptr); > + return -EINVAL; > + } > + } else { > + fprintf(stderr, "Could not parse value %s\n", val_ptr); > + return -EINVAL; > + } > + *(name_end + 1) = '\0'; > + (*presets)[*size].name = strdup(name_ptr); > + (*size)++; ... maybe let's do something simpler and dumber? Try to parse provided string as integer, if it succeeds (and consumes entire string) -- great, if not -- assume it's enum or true/false (if you support that)? btw, see scanf()'s %i modifier, it can parse both hex and dec numbers. You can just try with '-' and without. Basically, let's keep it straightforward, even if it's, technically speaking, suboptimal performance-wise. > + return 0; > +} > + > +static int parse_var_presets_from_file(const char *filename, struct var_preset **presets, > + int *capacity, int *size) > +{ > + FILE *f; > + char line[256]; > + int err = 0; > + > + f = fopen(filename, "rt"); > + if (!f) { > + fprintf(stderr, "Could not open file %s\n", filename); > + return -EINVAL; > + } > + > + while (fgets(line, sizeof(line), f)) { > + int err = parse_var_presets(line, presets, capacity, size); > + > + if (err) > + goto cleanup; > + } > + > +cleanup: > + fclose(f); > + return err; > +} see append_filter_file(), we do similar stuff there, keep it consistent > + > +static bool is_signed_type(const struct btf_type *t) > +{ > + if (btf_is_int(t)) > + return btf_int_encoding(t) & BTF_INT_SIGNED; > + if (btf_is_enum(t)) > + return btf_kflag(t); there is also enum64, use btf_is_any_enum() > + return true; > +} > + > +static int enum_value_from_name(const struct btf *btf, const struct btf_type *t, > + const char *evalue, long long *retval) > +{ > + if (btf_is_enum(t)) { > + struct btf_enum *e = btf_enum(t); > + int i, n = btf_vlen(t); > + > + for (i = 0; i < n; ++i) { > + const char *cur_name = btf__name_by_offset(btf, e[i].name_off); > + > + if (strcmp(cur_name, evalue) == 0) { > + *retval = e[i].val; > + return 0; > + } > + } > + } else if (btf_is_enum64(t)) { > + struct btf_enum64 *e = btf_enum64(t); > + int i, n = btf_vlen(t); > + > + for (i = 0; i < n; ++i) { > + struct btf_enum64 *cur = e + i; > + const char *cur_name = btf__name_by_offset(btf, cur->name_off); you have two conceptually identical loops, but in one you do `cur = e + i` and in another you do `e[i]` access... why? > + __u64 value = btf_enum64_value(cur); > + > + if (strcmp(cur_name, evalue) == 0) { > + *retval = value; > + return 0; > + } > + } > + } > + return -EINVAL; > +} > + > +static bool is_preset_supported(const struct btf_type *t) > +{ > + return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t); > +} > + > +static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct btf_type *t, > + struct bpf_map *map, struct btf_var_secinfo *sinfo, > + struct var_preset *preset) > +{ > + const struct btf_type *base_type; > + void *ptr; > + size_t size; > + > + base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type)); > + if (!base_type) { > + fprintf(stderr, "Could not resolve type %d\n", t->type); > + return -EINVAL; > + } > + if (!is_preset_supported(base_type)) { > + fprintf(stderr, "Setting global variable for btf kind %d is not supported\n", > + btf_kind(base_type)); > + return -EINVAL; > + } > + > + if (preset->type == NAME && btf_is_any_enum(base_type)) { > + /* Convert enum element name into integer */ > + long long ivalue; > + > + if (enum_value_from_name(btf, base_type, preset->svalue, &ivalue) != 0) { > + fprintf(stderr, "Could not find integer value for enum element %s\n", > + preset->svalue); > + return -EINVAL; > + } > + free(preset->svalue); > + preset->ivalue = ivalue; > + preset->type = INTEGRAL; but for different object file this value can change? You can cache for the same BTF, but once BTF changes, you'll have to recalculate it (I'd keep it simple and look it up every single time, for now) > + } > + > + /* Check if value fits into the target variable size */ > + if (sinfo->size < sizeof(preset->ivalue)) { > + bool is_signed = is_signed_type(base_type); > + __u32 unsigned_bits = sinfo->size * 8 - (is_signed ? 1 : 0); > + long long max_val = 1ll << unsigned_bits; what about u64? 1 << 64 ? > + > + if (preset->ivalue >= max_val || preset->ivalue < -max_val) { > + fprintf(stderr, > + "Variable %s value %lld is out of range [%lld; %lld]\n", > + btf__name_by_offset(btf, t->name_off), preset->ivalue, > + is_signed ? -max_val : 0, max_val - 1); > + return -EINVAL; > + } > + } > + > + ptr = (void *)bpf_map__initial_value(map, &size); > + if (!ptr || (sinfo->offset + sinfo->size > size)) > + return -EINVAL; > + > + if (__BYTE_ORDER == __LITTLE_ENDIAN) { > + memcpy(ptr + sinfo->offset, &preset->ivalue, sinfo->size); > + } else if (__BYTE_ORDER == __BIG_ENDIAN) { > + __u8 src_offset = sizeof(preset->ivalue) - sinfo->size; > + > + memcpy(ptr + sinfo->offset, (void *)&preset->ivalue + src_offset, sinfo->size); > + } > + return 0; > +} > + > +static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, int npresets) > +{ > + struct btf_var_secinfo *sinfo; > + const char *sec_name; > + const struct btf_type *type; > + struct bpf_map *map; > + struct btf *btf; > + bool *set_var; > + int i, j, k, n, cnt, err = 0; > + > + if (npresets == 0) > + return 0; > + > + btf = bpf_object__btf(obj); > + if (!btf) > + return -EINVAL; > + > + set_var = calloc(npresets, sizeof(bool)); > + for (i = 0; i < npresets; ++i) > + set_var[i] = false; calloc() is zero-initializing, no need to set to false > + > + cnt = btf__type_cnt(btf); > + for (i = 0; i != cnt; ++i) { double space > + type = btf__type_by_id(btf, i); nit: type -> t, we use that convention when working with BTF types quite consistently (btw, zero is always VOID, so you can always skip it with `i = 1`) > + > + if (!btf_is_datasec(type)) > + continue; > + > + sinfo = btf_var_secinfos(type); > + sec_name = btf__name_by_offset(btf, type->name_off); > + map = bpf_object__find_map_by_name(obj, sec_name); > + if (!map) > + continue; > + > + n = btf_vlen(type); > + for (j = 0; j < n; ++j, ++sinfo) { > + const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type); > + const char *var_name = btf__name_by_offset(btf, var_type->name_off); it's kind of bad style, IMO, to look something up for var_type->name_off before you are sure it's what you care about (btf_is_var()), move assignment to after the if? > + > + if (!btf_is_var(var_type)) > + continue; > + > + for (k = 0; k < npresets; ++k) { > + if (strcmp(var_name, presets[k].name) != 0) > + continue; > + > + if (set_var[k]) { maybe just have an extra counter in preset itself, which you can clear between BPF program loads? Less trouble with extra dynamic memory allocation > + fprintf(stderr, "Variable %s is set more than once", > + var_name); I'd error out in this case, tbh (it's either user error or static global variables, which I'm sure is unintentional in 99% of cases) > + } > + > + err = set_global_var(obj, btf, var_type, map, sinfo, presets + k); > + if (err) > + goto out; > + > + set_var[k] = true; > + break; > + } > + } > + } > + for (i = 0; i < npresets; ++i) { > + if (!set_var[i]) { > + fprintf(stderr, "Global variable preset %s has not been applied\n", > + presets[i].name); > + } > + } > +out: > + free(set_var); > + return err; > +} > + > static int process_obj(const char *filename) > { > const char *base_filename = basename(strdupa(filename)); > @@ -1299,7 +1599,7 @@ static int process_obj(const char *filename) > struct bpf_program *prog, *tprog, *lprog; > libbpf_print_fn_t old_libbpf_print_fn; > LIBBPF_OPTS(bpf_object_open_opts, opts); > - int err = 0, prog_cnt = 0; > + int err = 0; > > if (!should_process_file_prog(base_filename, NULL)) { > if (env.verbose) > @@ -1334,17 +1634,6 @@ static int process_obj(const char *filename) > > env.files_processed++; > > - bpf_object__for_each_program(prog, obj) { > - prog_cnt++; > - } > - > - if (prog_cnt == 1) { > - prog = bpf_object__next_program(obj, NULL); > - bpf_program__set_autoload(prog, true); > - process_prog(filename, obj, prog); > - goto cleanup; > - } > - I think this was an optimization to avoid a heavy-weight ELF parsing twice, why would we want to remove it?.. pw-bot: cr > bpf_object__for_each_program(prog, obj) { > const char *prog_name = bpf_program__name(prog); > > @@ -1355,6 +1644,12 @@ static int process_obj(const char *filename) > goto cleanup; > } > > + err = set_global_vars(tobj, env.presets, env.npresets); > + if (err) { > + fprintf(stderr, "Failed to set global variables\n"); > + goto cleanup; > + } > + > lprog = NULL; > bpf_object__for_each_program(tprog, tobj) { > const char *tprog_name = bpf_program__name(tprog); > -- > 2.48.1 >
On Mon, 2025-02-10 at 13:51 +0000, Mykyta Yatsenko wrote: [...] > @@ -363,6 +378,24 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) > return -ENOMEM; > env.filename_cnt++; > break; > + case 'G': { > + static int presets_cap; > + char *expr = strdup(arg); > + > + if (expr[0] == '@') { > + if (parse_var_presets_from_file(expr + 1, &env.presets, > + &presets_cap, &env.npresets)) { Nit: I'd modify 'env' directly in parse_var_presets{,_from_file} and add presets_cap field to 'env': to avoid static variable and to avoid '(*presets)[*size].name = ...' below. > + fprintf(stderr, "Could not parse global variables preset: %s\n", > + arg); > + argp_usage(state); > + } > + } else if (parse_var_presets(expr, &env.presets, &presets_cap, &env.npresets)) { > + fprintf(stderr, "Could not parse global variables preset: %s\n", arg); > + argp_usage(state); > + } > + free(expr); > + break; > + } > default: > return ARGP_ERR_UNKNOWN; > } > @@ -1292,6 +1325,273 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf > return 0; > }; > > +static int parse_var_presets(char *expr, struct var_preset **presets, int *capacity, int *size) > +{ > + char *eq_ptr = strchr(expr, '='); > + char *name_ptr = expr; > + char *name_end = eq_ptr - 1; > + char *val_ptr = eq_ptr + 1; > + long long value; > + > + if (!eq_ptr) { > + fprintf(stderr, "No assignment in expression\n"); > + return -EINVAL; > + } > + > + while (isspace(*name_ptr)) > + ++name_ptr; > + while (isspace(*name_end)) > + --name_end; I think this loop has to be capped by string start check, otherwise for -G ' = 10' it might read some uninitialized memory. > + while (isspace(*val_ptr)) > + ++val_ptr; > + > + if (name_ptr > name_end) { > + fprintf(stderr, "Empty variable name in expression %s\n", expr); > + return -EINVAL; > + } > + > + if (*size >= *capacity) { > + *capacity = max(*capacity * 2, 1); > + *presets = realloc(*presets, *capacity * sizeof(**presets)); Nit: if realloc() fails it returns NULL, so the pointer to older *presets would be lost and never freed (but we exit the program in case of an error, so not really an issue). Also, check for NULL and return -ENOMEM? > + } > + > + if (isalpha(*val_ptr)) { > + char *value_end = val_ptr + strlen(val_ptr) - 1; > + > + while (isspace(*value_end)) > + --value_end; > + *(value_end + 1) = '\0'; > + > + (*presets)[*size].svalue = strdup(val_ptr); Silly question, why strdup here and for .name? Keeping pointers to argv should be fine as far as I know. > + (*presets)[*size].type = NAME; > + } else if (*val_ptr == '-' || isdigit(*val_ptr)) { > + errno = 0; > + value = strtoll(val_ptr, NULL, 0); > + if (errno == ERANGE) { > + errno = 0; > + value = strtoull(val_ptr, NULL, 0); > + } > + (*presets)[*size].ivalue = value; > + (*presets)[*size].type = INTEGRAL; > + if (errno) { > + fprintf(stderr, "Could not parse integer value %s\n", val_ptr); > + return -EINVAL; > + } > + } else { > + fprintf(stderr, "Could not parse value %s\n", val_ptr); > + return -EINVAL; > + } > + *(name_end + 1) = '\0'; > + (*presets)[*size].name = strdup(name_ptr); > + (*size)++; > + return 0; > +} > + > +static int parse_var_presets_from_file(const char *filename, struct var_preset **presets, > + int *capacity, int *size) Thank you for adding this! > +{ > + FILE *f; > + char line[256]; > + int err = 0; > + > + f = fopen(filename, "rt"); > + if (!f) { > + fprintf(stderr, "Could not open file %s\n", filename); > + return -EINVAL; > + } > + > + while (fgets(line, sizeof(line), f)) { > + int err = parse_var_presets(line, presets, capacity, size); > + > + if (err) > + goto cleanup; > + } > + > +cleanup: Nit: I'd check for ferror(f) and write something to stderr here. > + fclose(f); > + return err; > +} > + > +static bool is_signed_type(const struct btf_type *t) > +{ > + if (btf_is_int(t)) > + return btf_int_encoding(t) & BTF_INT_SIGNED; > + if (btf_is_enum(t)) > + return btf_kflag(t); > + return true; > +} [...] > +static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, int npresets) > +{ > + struct btf_var_secinfo *sinfo; > + const char *sec_name; > + const struct btf_type *type; > + struct bpf_map *map; > + struct btf *btf; > + bool *set_var; > + int i, j, k, n, cnt, err = 0; > + > + if (npresets == 0) > + return 0; > + > + btf = bpf_object__btf(obj); > + if (!btf) > + return -EINVAL; > + > + set_var = calloc(npresets, sizeof(bool)); > + for (i = 0; i < npresets; ++i) > + set_var[i] = false; As Andrii writes in a sibling thread, I'd keep this flag in the 'struct var_preset'. > + > + cnt = btf__type_cnt(btf); > + for (i = 0; i != cnt; ++i) { > + type = btf__type_by_id(btf, i); > + > + if (!btf_is_datasec(type)) > + continue; > + > + sinfo = btf_var_secinfos(type); > + sec_name = btf__name_by_offset(btf, type->name_off); > + map = bpf_object__find_map_by_name(obj, sec_name); > + if (!map) > + continue; > + > + n = btf_vlen(type); > + for (j = 0; j < n; ++j, ++sinfo) { > + const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type); > + const char *var_name = btf__name_by_offset(btf, var_type->name_off); > + > + if (!btf_is_var(var_type)) > + continue; > + > + for (k = 0; k < npresets; ++k) { > + if (strcmp(var_name, presets[k].name) != 0) > + continue; > + > + if (set_var[k]) { > + fprintf(stderr, "Variable %s is set more than once", > + var_name); > + } > + > + err = set_global_var(obj, btf, var_type, map, sinfo, presets + k); > + if (err) > + goto out; > + > + set_var[k] = true; > + break; > + } > + } > + } > + for (i = 0; i < npresets; ++i) { > + if (!set_var[i]) { > + fprintf(stderr, "Global variable preset %s has not been applied\n", > + presets[i].name); > + } > + } > +out: > + free(set_var); > + return err; > +} > + [...]
On Mon, 2025-02-10 at 17:13 -0800, Andrii Nakryiko wrote: [...] > > @@ -1334,17 +1634,6 @@ static int process_obj(const char *filename) > > > > env.files_processed++; > > > > - bpf_object__for_each_program(prog, obj) { > > - prog_cnt++; > > - } > > - > > - if (prog_cnt == 1) { > > - prog = bpf_object__next_program(obj, NULL); > > - bpf_program__set_autoload(prog, true); > > - process_prog(filename, obj, prog); > > - goto cleanup; > > - } > > - > > I think this was an optimization to avoid a heavy-weight ELF parsing > twice, why would we want to remove it?.. > > pw-bot: cr The v1 of this patch missed the case that globals have to be set in both cases, when prog_cnt == 1 and prog_cnt != 1. I remember making same mistake when debugging something unrelated. Hence I suggested removing this special case. > > bpf_object__for_each_program(prog, obj) { > > const char *prog_name = bpf_program__name(prog); > >
On 11/02/2025 01:13, Andrii Nakryiko wrote: > On Mon, Feb 10, 2025 at 5:51 AM Mykyta Yatsenko > <mykyta.yatsenko5@gmail.com> wrote: >> From: Mykyta Yatsenko <yatsenko@meta.com> >> >> To better verify some complex BPF programs we'd like to preset global >> variables. >> This patch introduces CLI argument `--set-global-vars` or `-G` to >> veristat, that allows presetting values to global variables defined >> in BPF program. For example: >> >> prog.c: >> ``` >> enum Enum { ELEMENT1 = 0, ELEMENT2 = 5 }; >> const volatile __s64 a = 5; >> const volatile __u8 b = 5; >> const volatile enum Enum c = ELEMENT2; >> const volatile bool d = false; >> >> char arr[4] = {0}; >> >> SEC("tp_btf/sched_switch") >> int BPF_PROG(...) >> { >> bpf_printk("%c\n", arr[a]); >> bpf_printk("%c\n", arr[b]); >> bpf_printk("%c\n", arr[c]); >> bpf_printk("%c\n", arr[d]); >> return 0; >> } >> ``` >> By default verification of the program fails: >> ``` >> ./veristat prog.bpf.o >> ``` >> By presetting global variables, we can make verification pass: >> ``` >> ./veristat wq.bpf.o -G "a = 0" -G "b = 1" -G "c = 2" -G "d = 3" >> ``` >> >> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com> >> --- >> tools/testing/selftests/bpf/veristat.c | 319 ++++++++++++++++++++++++- >> 1 file changed, 307 insertions(+), 12 deletions(-) >> >> diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c >> index 06af5029885b..b4521ebb6e6a 100644 >> --- a/tools/testing/selftests/bpf/veristat.c >> +++ b/tools/testing/selftests/bpf/veristat.c >> @@ -154,6 +154,15 @@ struct filter { >> bool abs; >> }; >> >> +struct var_preset { >> + char *name; >> + enum { INTEGRAL, NAME } type; >> + union { >> + long long ivalue; >> + char *svalue; >> + }; >> +}; >> + >> static struct env { >> char **filenames; >> int filename_cnt; >> @@ -195,6 +204,8 @@ static struct env { >> int progs_processed; >> int progs_skipped; >> int top_src_lines; >> + struct var_preset *presets; >> + int npresets; >> } env; >> >> static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) >> @@ -246,12 +257,16 @@ static const struct argp_option opts[] = { >> { "test-reg-invariants", 'r', NULL, 0, >> "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" }, >> { "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" }, >> + { "set-global-vars", 'G', "GLOBALS", 0, "Set global variables provided in the expression, for example \"var1 = 1\"" }, >> {}, >> }; >> >> static int parse_stats(const char *stats_str, struct stat_specs *specs); >> static int append_filter(struct filter **filters, int *cnt, const char *str); >> static int append_filter_file(const char *path); >> +static int parse_var_presets(char *expr, struct var_preset **presets, int *capacity, int *size); >> +static int parse_var_presets_from_file(const char *filename, struct var_preset **presets, >> + int *capacity, int *size); > nit: append_filter vs append_filter_file would imply this should be > parse_var_presets vs parse_var_presets_file, no? Sure, makes sense. >> static error_t parse_arg(int key, char *arg, struct argp_state *state) >> { >> @@ -363,6 +378,24 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) >> return -ENOMEM; >> env.filename_cnt++; >> break; >> + case 'G': { >> + static int presets_cap; >> + char *expr = strdup(arg); >> + >> + if (expr[0] == '@') { >> + if (parse_var_presets_from_file(expr + 1, &env.presets, >> + &presets_cap, &env.npresets)) { >> + fprintf(stderr, "Could not parse global variables preset: %s\n", >> + arg); >> + argp_usage(state); >> + } >> + } else if (parse_var_presets(expr, &env.presets, &presets_cap, &env.npresets)) { >> + fprintf(stderr, "Could not parse global variables preset: %s\n", arg); >> + argp_usage(state); >> + } >> + free(expr); >> + break; >> + } > Can you please follow the append_filter pattern here? Consistency is > good. I don't think we want presets_cap and expr here as well. Don't > micro-optimize, it's ok to call realloc() for each entry for > non-performance critical code. Internally libc won't really reallocate > every single time. > >> default: >> return ARGP_ERR_UNKNOWN; >> } >> @@ -1292,6 +1325,273 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf >> return 0; >> }; >> >> +static int parse_var_presets(char *expr, struct var_preset **presets, int *capacity, int *size) >> +{ >> + char *eq_ptr = strchr(expr, '='); >> + char *name_ptr = expr; >> + char *name_end = eq_ptr - 1; >> + char *val_ptr = eq_ptr + 1; >> + long long value; >> + >> + if (!eq_ptr) { >> + fprintf(stderr, "No assignment in expression\n"); >> + return -EINVAL; >> + } >> + >> + while (isspace(*name_ptr)) >> + ++name_ptr; >> + while (isspace(*name_end)) >> + --name_end; >> + while (isspace(*val_ptr)) >> + ++val_ptr; > here's a pro tip: scanf() is pretty useful and powerful parser for > simple stuff. scanf(" %s = %[^\n]\n") will trim spaces around variable > name and equality and will capture the rest of string. Or if you do " > %s = %s\n" it will trim spaces and do the right thing as long as we > don't expect whitespace to be valid value (we can start there for now, > because it's true for integers and enums) > >> + >> + if (name_ptr > name_end) { >> + fprintf(stderr, "Empty variable name in expression %s\n", expr); >> + return -EINVAL; >> + } >> + >> + if (*size >= *capacity) { >> + *capacity = max(*capacity * 2, 1); >> + *presets = realloc(*presets, *capacity * sizeof(**presets)); >> + } >> + > as I mentioned above, don't bother optimizing and keeping track of > capacity, just realloc() every single time, it's fine > >> + if (isalpha(*val_ptr)) { >> + char *value_end = val_ptr + strlen(val_ptr) - 1; >> + >> + while (isspace(*value_end)) >> + --value_end; >> + *(value_end + 1) = '\0'; >> + >> + (*presets)[*size].svalue = strdup(val_ptr); >> + (*presets)[*size].type = NAME; >> + } else if (*val_ptr == '-' || isdigit(*val_ptr)) { >> + errno = 0; >> + value = strtoll(val_ptr, NULL, 0); >> + if (errno == ERANGE) { >> + errno = 0; >> + value = strtoull(val_ptr, NULL, 0); >> + } >> + (*presets)[*size].ivalue = value; >> + (*presets)[*size].type = INTEGRAL; >> + if (errno) { >> + fprintf(stderr, "Could not parse integer value %s\n", val_ptr); >> + return -EINVAL; >> + } >> + } else { >> + fprintf(stderr, "Could not parse value %s\n", val_ptr); >> + return -EINVAL; >> + } >> + *(name_end + 1) = '\0'; >> + (*presets)[*size].name = strdup(name_ptr); >> + (*size)++; > ... maybe let's do something simpler and dumber? Try to parse provided > string as integer, if it succeeds (and consumes entire string) -- > great, if not -- assume it's enum or true/false (if you support that)? > > btw, see scanf()'s %i modifier, it can parse both hex and dec numbers. > You can just try with '-' and without. > > Basically, let's keep it straightforward, even if it's, technically > speaking, suboptimal performance-wise. > >> + return 0; >> +} >> + >> +static int parse_var_presets_from_file(const char *filename, struct var_preset **presets, >> + int *capacity, int *size) >> +{ >> + FILE *f; >> + char line[256]; >> + int err = 0; >> + >> + f = fopen(filename, "rt"); >> + if (!f) { >> + fprintf(stderr, "Could not open file %s\n", filename); >> + return -EINVAL; >> + } >> + >> + while (fgets(line, sizeof(line), f)) { >> + int err = parse_var_presets(line, presets, capacity, size); >> + >> + if (err) >> + goto cleanup; >> + } >> + >> +cleanup: >> + fclose(f); >> + return err; >> +} > see append_filter_file(), we do similar stuff there, keep it consistent > >> + >> +static bool is_signed_type(const struct btf_type *t) >> +{ >> + if (btf_is_int(t)) >> + return btf_int_encoding(t) & BTF_INT_SIGNED; >> + if (btf_is_enum(t)) >> + return btf_kflag(t); > there is also enum64, use btf_is_any_enum() > >> + return true; >> +} >> + >> +static int enum_value_from_name(const struct btf *btf, const struct btf_type *t, >> + const char *evalue, long long *retval) >> +{ >> + if (btf_is_enum(t)) { >> + struct btf_enum *e = btf_enum(t); >> + int i, n = btf_vlen(t); >> + >> + for (i = 0; i < n; ++i) { >> + const char *cur_name = btf__name_by_offset(btf, e[i].name_off); >> + >> + if (strcmp(cur_name, evalue) == 0) { >> + *retval = e[i].val; >> + return 0; >> + } >> + } >> + } else if (btf_is_enum64(t)) { >> + struct btf_enum64 *e = btf_enum64(t); >> + int i, n = btf_vlen(t); >> + >> + for (i = 0; i < n; ++i) { >> + struct btf_enum64 *cur = e + i; >> + const char *cur_name = btf__name_by_offset(btf, cur->name_off); > you have two conceptually identical loops, but in one you do `cur = e > + i` and in another you do `e[i]` access... why? The difference is that for e64 case we get value by the `btf_enum64_value` function, which accepts pointer to `btf_enum64`, I think it is a bit cleaner to have an explicit assignment `struct btf_enum64 *cur = e + i;`, instead of passing `&e[i]` into btf_enum64_value. Though, let's make both loops more consistent. >> + __u64 value = btf_enum64_value(cur); >> + >> + if (strcmp(cur_name, evalue) == 0) { >> + *retval = value; >> + return 0; >> + } >> + } >> + } >> + return -EINVAL; >> +} >> + >> +static bool is_preset_supported(const struct btf_type *t) >> +{ >> + return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t); >> +} >> + >> +static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct btf_type *t, >> + struct bpf_map *map, struct btf_var_secinfo *sinfo, >> + struct var_preset *preset) >> +{ >> + const struct btf_type *base_type; >> + void *ptr; >> + size_t size; >> + >> + base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type)); >> + if (!base_type) { >> + fprintf(stderr, "Could not resolve type %d\n", t->type); >> + return -EINVAL; >> + } >> + if (!is_preset_supported(base_type)) { >> + fprintf(stderr, "Setting global variable for btf kind %d is not supported\n", >> + btf_kind(base_type)); >> + return -EINVAL; >> + } >> + >> + if (preset->type == NAME && btf_is_any_enum(base_type)) { >> + /* Convert enum element name into integer */ >> + long long ivalue; >> + >> + if (enum_value_from_name(btf, base_type, preset->svalue, &ivalue) != 0) { >> + fprintf(stderr, "Could not find integer value for enum element %s\n", >> + preset->svalue); >> + return -EINVAL; >> + } >> + free(preset->svalue); >> + preset->ivalue = ivalue; >> + preset->type = INTEGRAL; > but for different object file this value can change? You can cache for > the same BTF, but once BTF changes, you'll have to recalculate it (I'd > keep it simple and look it up every single time, for now) > >> + } >> + >> + /* Check if value fits into the target variable size */ >> + if (sinfo->size < sizeof(preset->ivalue)) { >> + bool is_signed = is_signed_type(base_type); >> + __u32 unsigned_bits = sinfo->size * 8 - (is_signed ? 1 : 0); >> + long long max_val = 1ll << unsigned_bits; > what about u64? 1 << 64 ? This should not be executed for u64, check `if (sinfo->size < sizeof(preset->ivalue))` is there for that. > >> + >> + if (preset->ivalue >= max_val || preset->ivalue < -max_val) { >> + fprintf(stderr, >> + "Variable %s value %lld is out of range [%lld; %lld]\n", >> + btf__name_by_offset(btf, t->name_off), preset->ivalue, >> + is_signed ? -max_val : 0, max_val - 1); >> + return -EINVAL; >> + } >> + } >> + >> + ptr = (void *)bpf_map__initial_value(map, &size); >> + if (!ptr || (sinfo->offset + sinfo->size > size)) >> + return -EINVAL; >> + >> + if (__BYTE_ORDER == __LITTLE_ENDIAN) { >> + memcpy(ptr + sinfo->offset, &preset->ivalue, sinfo->size); >> + } else if (__BYTE_ORDER == __BIG_ENDIAN) { >> + __u8 src_offset = sizeof(preset->ivalue) - sinfo->size; >> + >> + memcpy(ptr + sinfo->offset, (void *)&preset->ivalue + src_offset, sinfo->size); >> + } >> + return 0; >> +} >> + >> +static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, int npresets) >> +{ >> + struct btf_var_secinfo *sinfo; >> + const char *sec_name; >> + const struct btf_type *type; >> + struct bpf_map *map; >> + struct btf *btf; >> + bool *set_var; >> + int i, j, k, n, cnt, err = 0; >> + >> + if (npresets == 0) >> + return 0; >> + >> + btf = bpf_object__btf(obj); >> + if (!btf) >> + return -EINVAL; >> + >> + set_var = calloc(npresets, sizeof(bool)); >> + for (i = 0; i < npresets; ++i) >> + set_var[i] = false; > calloc() is zero-initializing, no need to set to false > >> + >> + cnt = btf__type_cnt(btf); >> + for (i = 0; i != cnt; ++i) { > double space > >> + type = btf__type_by_id(btf, i); > nit: type -> t, we use that convention when working with BTF types > quite consistently (btw, zero is always VOID, so you can always skip > it with `i = 1`) > >> + >> + if (!btf_is_datasec(type)) >> + continue; >> + >> + sinfo = btf_var_secinfos(type); >> + sec_name = btf__name_by_offset(btf, type->name_off); >> + map = bpf_object__find_map_by_name(obj, sec_name); >> + if (!map) >> + continue; >> + >> + n = btf_vlen(type); >> + for (j = 0; j < n; ++j, ++sinfo) { >> + const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type); >> + const char *var_name = btf__name_by_offset(btf, var_type->name_off); > it's kind of bad style, IMO, to look something up for > var_type->name_off before you are sure it's what you care about > (btf_is_var()), move assignment to after the if? Agree. >> + >> + if (!btf_is_var(var_type)) >> + continue; >> + >> + for (k = 0; k < npresets; ++k) { >> + if (strcmp(var_name, presets[k].name) != 0) >> + continue; >> + >> + if (set_var[k]) { > maybe just have an extra counter in preset itself, which you can clear > between BPF program loads? Less trouble with extra dynamic memory > allocation > >> + fprintf(stderr, "Variable %s is set more than once", >> + var_name); > I'd error out in this case, tbh (it's either user error or static > global variables, which I'm sure is unintentional in 99% of cases) > >> + } >> + >> + err = set_global_var(obj, btf, var_type, map, sinfo, presets + k); >> + if (err) >> + goto out; >> + >> + set_var[k] = true; >> + break; >> + } >> + } >> + } >> + for (i = 0; i < npresets; ++i) { >> + if (!set_var[i]) { >> + fprintf(stderr, "Global variable preset %s has not been applied\n", >> + presets[i].name); >> + } >> + } >> +out: >> + free(set_var); >> + return err; >> +} >> + >> static int process_obj(const char *filename) >> { >> const char *base_filename = basename(strdupa(filename)); >> @@ -1299,7 +1599,7 @@ static int process_obj(const char *filename) >> struct bpf_program *prog, *tprog, *lprog; >> libbpf_print_fn_t old_libbpf_print_fn; >> LIBBPF_OPTS(bpf_object_open_opts, opts); >> - int err = 0, prog_cnt = 0; >> + int err = 0; >> >> if (!should_process_file_prog(base_filename, NULL)) { >> if (env.verbose) >> @@ -1334,17 +1634,6 @@ static int process_obj(const char *filename) >> >> env.files_processed++; >> >> - bpf_object__for_each_program(prog, obj) { >> - prog_cnt++; >> - } >> - >> - if (prog_cnt == 1) { >> - prog = bpf_object__next_program(obj, NULL); >> - bpf_program__set_autoload(prog, true); >> - process_prog(filename, obj, prog); >> - goto cleanup; >> - } >> - > I think this was an optimization to avoid a heavy-weight ELF parsing > twice, why would we want to remove it?.. Thanks for explaining, this looked like unnecessary code duplication, I'll revert it. > pw-bot: cr > >> bpf_object__for_each_program(prog, obj) { >> const char *prog_name = bpf_program__name(prog); >> >> @@ -1355,6 +1644,12 @@ static int process_obj(const char *filename) >> goto cleanup; >> } >> >> + err = set_global_vars(tobj, env.presets, env.npresets); >> + if (err) { >> + fprintf(stderr, "Failed to set global variables\n"); >> + goto cleanup; >> + } >> + >> lprog = NULL; >> bpf_object__for_each_program(tprog, tobj) { >> const char *tprog_name = bpf_program__name(tprog); >> -- >> 2.48.1 >>
On 11/02/2025 01:24, Eduard Zingerman wrote: > On Mon, 2025-02-10 at 13:51 +0000, Mykyta Yatsenko wrote: > > [...] > >> @@ -363,6 +378,24 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) >> return -ENOMEM; >> env.filename_cnt++; >> break; >> + case 'G': { >> + static int presets_cap; >> + char *expr = strdup(arg); >> + >> + if (expr[0] == '@') { >> + if (parse_var_presets_from_file(expr + 1, &env.presets, >> + &presets_cap, &env.npresets)) { > Nit: I'd modify 'env' directly in parse_var_presets{,_from_file} and > add presets_cap field to 'env': to avoid static variable and to > avoid '(*presets)[*size].name = ...' below. > >> + fprintf(stderr, "Could not parse global variables preset: %s\n", >> + arg); >> + argp_usage(state); >> + } >> + } else if (parse_var_presets(expr, &env.presets, &presets_cap, &env.npresets)) { >> + fprintf(stderr, "Could not parse global variables preset: %s\n", arg); >> + argp_usage(state); >> + } >> + free(expr); >> + break; >> + } >> default: >> return ARGP_ERR_UNKNOWN; >> } >> @@ -1292,6 +1325,273 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf >> return 0; >> }; >> >> +static int parse_var_presets(char *expr, struct var_preset **presets, int *capacity, int *size) >> +{ >> + char *eq_ptr = strchr(expr, '='); >> + char *name_ptr = expr; >> + char *name_end = eq_ptr - 1; >> + char *val_ptr = eq_ptr + 1; >> + long long value; >> + >> + if (!eq_ptr) { >> + fprintf(stderr, "No assignment in expression\n"); >> + return -EINVAL; >> + } >> + >> + while (isspace(*name_ptr)) >> + ++name_ptr; >> + while (isspace(*name_end)) >> + --name_end; > I think this loop has to be capped by string start check, > otherwise for -G ' = 10' it might read some uninitialized memory. > >> + while (isspace(*val_ptr)) >> + ++val_ptr; >> + >> + if (name_ptr > name_end) { >> + fprintf(stderr, "Empty variable name in expression %s\n", expr); >> + return -EINVAL; >> + } >> + >> + if (*size >= *capacity) { >> + *capacity = max(*capacity * 2, 1); >> + *presets = realloc(*presets, *capacity * sizeof(**presets)); > Nit: if realloc() fails it returns NULL, > so the pointer to older *presets would be lost and never freed > (but we exit the program in case of an error, so not really an issue). > Also, check for NULL and return -ENOMEM? > >> + } >> + >> + if (isalpha(*val_ptr)) { >> + char *value_end = val_ptr + strlen(val_ptr) - 1; >> + >> + while (isspace(*value_end)) >> + --value_end; >> + *(value_end + 1) = '\0'; >> + >> + (*presets)[*size].svalue = strdup(val_ptr); > Silly question, why strdup here and for .name? > Keeping pointers to argv should be fine as far as I know. Right, It seems like I don't really need to make this copy. Although, I'm going to follow up what Andrii suggested and use sscanf, in that case copy will be needed. >> + (*presets)[*size].type = NAME; >> + } else if (*val_ptr == '-' || isdigit(*val_ptr)) { >> + errno = 0; >> + value = strtoll(val_ptr, NULL, 0); >> + if (errno == ERANGE) { >> + errno = 0; >> + value = strtoull(val_ptr, NULL, 0); >> + } >> + (*presets)[*size].ivalue = value; >> + (*presets)[*size].type = INTEGRAL; >> + if (errno) { >> + fprintf(stderr, "Could not parse integer value %s\n", val_ptr); >> + return -EINVAL; >> + } >> + } else { >> + fprintf(stderr, "Could not parse value %s\n", val_ptr); >> + return -EINVAL; >> + } >> + *(name_end + 1) = '\0'; >> + (*presets)[*size].name = strdup(name_ptr); >> + (*size)++; >> + return 0; >> +} >> + >> +static int parse_var_presets_from_file(const char *filename, struct var_preset **presets, >> + int *capacity, int *size) > Thank you for adding this! > >> +{ >> + FILE *f; >> + char line[256]; >> + int err = 0; >> + >> + f = fopen(filename, "rt"); >> + if (!f) { >> + fprintf(stderr, "Could not open file %s\n", filename); >> + return -EINVAL; >> + } >> + >> + while (fgets(line, sizeof(line), f)) { >> + int err = parse_var_presets(line, presets, capacity, size); >> + >> + if (err) >> + goto cleanup; >> + } >> + >> +cleanup: > Nit: I'd check for ferror(f) and write something to stderr here. > >> + fclose(f); >> + return err; >> +} >> + >> +static bool is_signed_type(const struct btf_type *t) >> +{ >> + if (btf_is_int(t)) >> + return btf_int_encoding(t) & BTF_INT_SIGNED; >> + if (btf_is_enum(t)) >> + return btf_kflag(t); >> + return true; >> +} > [...] > >> +static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, int npresets) >> +{ >> + struct btf_var_secinfo *sinfo; >> + const char *sec_name; >> + const struct btf_type *type; >> + struct bpf_map *map; >> + struct btf *btf; >> + bool *set_var; >> + int i, j, k, n, cnt, err = 0; >> + >> + if (npresets == 0) >> + return 0; >> + >> + btf = bpf_object__btf(obj); >> + if (!btf) >> + return -EINVAL; >> + >> + set_var = calloc(npresets, sizeof(bool)); >> + for (i = 0; i < npresets; ++i) >> + set_var[i] = false; > As Andrii writes in a sibling thread, I'd keep this flag in the > 'struct var_preset'. > >> + >> + cnt = btf__type_cnt(btf); >> + for (i = 0; i != cnt; ++i) { >> + type = btf__type_by_id(btf, i); >> + >> + if (!btf_is_datasec(type)) >> + continue; >> + >> + sinfo = btf_var_secinfos(type); >> + sec_name = btf__name_by_offset(btf, type->name_off); >> + map = bpf_object__find_map_by_name(obj, sec_name); >> + if (!map) >> + continue; >> + >> + n = btf_vlen(type); >> + for (j = 0; j < n; ++j, ++sinfo) { >> + const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type); >> + const char *var_name = btf__name_by_offset(btf, var_type->name_off); >> + >> + if (!btf_is_var(var_type)) >> + continue; >> + >> + for (k = 0; k < npresets; ++k) { >> + if (strcmp(var_name, presets[k].name) != 0) >> + continue; >> + >> + if (set_var[k]) { >> + fprintf(stderr, "Variable %s is set more than once", >> + var_name); >> + } >> + >> + err = set_global_var(obj, btf, var_type, map, sinfo, presets + k); >> + if (err) >> + goto out; >> + >> + set_var[k] = true; >> + break; >> + } >> + } >> + } >> + for (i = 0; i < npresets; ++i) { >> + if (!set_var[i]) { >> + fprintf(stderr, "Global variable preset %s has not been applied\n", >> + presets[i].name); >> + } >> + } >> +out: >> + free(set_var); >> + return err; >> +} >> + > [...] > Thanks, I'll apply your suggestions in v3.
On Tue, Feb 11, 2025 at 7:00 AM Mykyta Yatsenko <mykyta.yatsenko5@gmail.com> wrote: > > On 11/02/2025 01:13, Andrii Nakryiko wrote: > > On Mon, Feb 10, 2025 at 5:51 AM Mykyta Yatsenko > > <mykyta.yatsenko5@gmail.com> wrote: > >> From: Mykyta Yatsenko <yatsenko@meta.com> > >> > >> To better verify some complex BPF programs we'd like to preset global > >> variables. > >> This patch introduces CLI argument `--set-global-vars` or `-G` to > >> veristat, that allows presetting values to global variables defined > >> in BPF program. For example: > >> > >> prog.c: > >> ``` > >> enum Enum { ELEMENT1 = 0, ELEMENT2 = 5 }; > >> const volatile __s64 a = 5; > >> const volatile __u8 b = 5; > >> const volatile enum Enum c = ELEMENT2; > >> const volatile bool d = false; > >> > >> char arr[4] = {0}; > >> > >> SEC("tp_btf/sched_switch") > >> int BPF_PROG(...) > >> { > >> bpf_printk("%c\n", arr[a]); > >> bpf_printk("%c\n", arr[b]); > >> bpf_printk("%c\n", arr[c]); > >> bpf_printk("%c\n", arr[d]); > >> return 0; > >> } > >> ``` > >> By default verification of the program fails: > >> ``` > >> ./veristat prog.bpf.o > >> ``` > >> By presetting global variables, we can make verification pass: > >> ``` > >> ./veristat wq.bpf.o -G "a = 0" -G "b = 1" -G "c = 2" -G "d = 3" > >> ``` > >> > >> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com> > >> --- > >> tools/testing/selftests/bpf/veristat.c | 319 ++++++++++++++++++++++++- > >> 1 file changed, 307 insertions(+), 12 deletions(-) > >> > >> diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c > >> index 06af5029885b..b4521ebb6e6a 100644 > >> --- a/tools/testing/selftests/bpf/veristat.c > >> +++ b/tools/testing/selftests/bpf/veristat.c > >> @@ -154,6 +154,15 @@ struct filter { > >> bool abs; > >> }; > >> [...] > >> +static int enum_value_from_name(const struct btf *btf, const struct btf_type *t, > >> + const char *evalue, long long *retval) > >> +{ > >> + if (btf_is_enum(t)) { > >> + struct btf_enum *e = btf_enum(t); > >> + int i, n = btf_vlen(t); > >> + > >> + for (i = 0; i < n; ++i) { > >> + const char *cur_name = btf__name_by_offset(btf, e[i].name_off); > >> + > >> + if (strcmp(cur_name, evalue) == 0) { > >> + *retval = e[i].val; > >> + return 0; > >> + } > >> + } > >> + } else if (btf_is_enum64(t)) { > >> + struct btf_enum64 *e = btf_enum64(t); > >> + int i, n = btf_vlen(t); > >> + > >> + for (i = 0; i < n; ++i) { > >> + struct btf_enum64 *cur = e + i; > >> + const char *cur_name = btf__name_by_offset(btf, cur->name_off); > > you have two conceptually identical loops, but in one you do `cur = e > > + i` and in another you do `e[i]` access... why? > The difference is that for e64 case we get value by the > `btf_enum64_value` function, which accepts pointer to `btf_enum64`, > I think it is a bit cleaner to have an explicit assignment `struct > btf_enum64 *cur = e + i;`, instead of passing `&e[i]` > into btf_enum64_value. Though, let's make both loops more consistent. I'd just do `e++` inside for() and get rid of cur altogether. > >> + __u64 value = btf_enum64_value(cur); > >> + > >> + if (strcmp(cur_name, evalue) == 0) { > >> + *retval = value; [...] > >> + } > >> + > >> + /* Check if value fits into the target variable size */ > >> + if (sinfo->size < sizeof(preset->ivalue)) { > >> + bool is_signed = is_signed_type(base_type); > >> + __u32 unsigned_bits = sinfo->size * 8 - (is_signed ? 1 : 0); > >> + long long max_val = 1ll << unsigned_bits; > > what about u64? 1 << 64 ? > > This should not be executed for u64, check `if (sinfo->size < > sizeof(preset->ivalue))` is there for that. ah, missed that check, ok > > > > >> + > >> + if (preset->ivalue >= max_val || preset->ivalue < -max_val) { > >> + fprintf(stderr, > >> + "Variable %s value %lld is out of range [%lld; %lld]\n", > >> + btf__name_by_offset(btf, t->name_off), preset->ivalue, > >> + is_signed ? -max_val : 0, max_val - 1); > >> + return -EINVAL; > >> + } > >> + } > >> + [...]
On Mon, Feb 10, 2025 at 5:44 PM Eduard Zingerman <eddyz87@gmail.com> wrote: > > On Mon, 2025-02-10 at 17:13 -0800, Andrii Nakryiko wrote: > > [...] > > > > @@ -1334,17 +1634,6 @@ static int process_obj(const char *filename) > > > > > > env.files_processed++; > > > > > > - bpf_object__for_each_program(prog, obj) { > > > - prog_cnt++; > > > - } > > > - > > > - if (prog_cnt == 1) { > > > - prog = bpf_object__next_program(obj, NULL); > > > - bpf_program__set_autoload(prog, true); > > > - process_prog(filename, obj, prog); > > > - goto cleanup; > > > - } > > > - > > > > I think this was an optimization to avoid a heavy-weight ELF parsing > > twice, why would we want to remove it?.. > > > > pw-bot: cr > > The v1 of this patch missed the case that globals have to be set in > both cases, when prog_cnt == 1 and prog_cnt != 1. I remember making > same mistake when debugging something unrelated. Hence I suggested > removing this special case. > Yeah, it's a bit of a gotcha for sure, but especially for something big like pyperf600 it does make a difference... I have a plan in mind to speed this up with a bit more work on libbpf-side APIs (bpf_object__prepare() API I mentioned in another thread a few days ago), so we'll be able to get rid of this. > > > bpf_object__for_each_program(prog, obj) { > > > const char *prog_name = bpf_program__name(prog); > > > >
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 06af5029885b..b4521ebb6e6a 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -154,6 +154,15 @@ struct filter { bool abs; }; +struct var_preset { + char *name; + enum { INTEGRAL, NAME } type; + union { + long long ivalue; + char *svalue; + }; +}; + static struct env { char **filenames; int filename_cnt; @@ -195,6 +204,8 @@ static struct env { int progs_processed; int progs_skipped; int top_src_lines; + struct var_preset *presets; + int npresets; } env; static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) @@ -246,12 +257,16 @@ static const struct argp_option opts[] = { { "test-reg-invariants", 'r', NULL, 0, "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" }, { "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" }, + { "set-global-vars", 'G', "GLOBALS", 0, "Set global variables provided in the expression, for example \"var1 = 1\"" }, {}, }; static int parse_stats(const char *stats_str, struct stat_specs *specs); static int append_filter(struct filter **filters, int *cnt, const char *str); static int append_filter_file(const char *path); +static int parse_var_presets(char *expr, struct var_preset **presets, int *capacity, int *size); +static int parse_var_presets_from_file(const char *filename, struct var_preset **presets, + int *capacity, int *size); static error_t parse_arg(int key, char *arg, struct argp_state *state) { @@ -363,6 +378,24 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) return -ENOMEM; env.filename_cnt++; break; + case 'G': { + static int presets_cap; + char *expr = strdup(arg); + + if (expr[0] == '@') { + if (parse_var_presets_from_file(expr + 1, &env.presets, + &presets_cap, &env.npresets)) { + fprintf(stderr, "Could not parse global variables preset: %s\n", + arg); + argp_usage(state); + } + } else if (parse_var_presets(expr, &env.presets, &presets_cap, &env.npresets)) { + fprintf(stderr, "Could not parse global variables preset: %s\n", arg); + argp_usage(state); + } + free(expr); + break; + } default: return ARGP_ERR_UNKNOWN; } @@ -1292,6 +1325,273 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf return 0; }; +static int parse_var_presets(char *expr, struct var_preset **presets, int *capacity, int *size) +{ + char *eq_ptr = strchr(expr, '='); + char *name_ptr = expr; + char *name_end = eq_ptr - 1; + char *val_ptr = eq_ptr + 1; + long long value; + + if (!eq_ptr) { + fprintf(stderr, "No assignment in expression\n"); + return -EINVAL; + } + + while (isspace(*name_ptr)) + ++name_ptr; + while (isspace(*name_end)) + --name_end; + while (isspace(*val_ptr)) + ++val_ptr; + + if (name_ptr > name_end) { + fprintf(stderr, "Empty variable name in expression %s\n", expr); + return -EINVAL; + } + + if (*size >= *capacity) { + *capacity = max(*capacity * 2, 1); + *presets = realloc(*presets, *capacity * sizeof(**presets)); + } + + if (isalpha(*val_ptr)) { + char *value_end = val_ptr + strlen(val_ptr) - 1; + + while (isspace(*value_end)) + --value_end; + *(value_end + 1) = '\0'; + + (*presets)[*size].svalue = strdup(val_ptr); + (*presets)[*size].type = NAME; + } else if (*val_ptr == '-' || isdigit(*val_ptr)) { + errno = 0; + value = strtoll(val_ptr, NULL, 0); + if (errno == ERANGE) { + errno = 0; + value = strtoull(val_ptr, NULL, 0); + } + (*presets)[*size].ivalue = value; + (*presets)[*size].type = INTEGRAL; + if (errno) { + fprintf(stderr, "Could not parse integer value %s\n", val_ptr); + return -EINVAL; + } + } else { + fprintf(stderr, "Could not parse value %s\n", val_ptr); + return -EINVAL; + } + *(name_end + 1) = '\0'; + (*presets)[*size].name = strdup(name_ptr); + (*size)++; + return 0; +} + +static int parse_var_presets_from_file(const char *filename, struct var_preset **presets, + int *capacity, int *size) +{ + FILE *f; + char line[256]; + int err = 0; + + f = fopen(filename, "rt"); + if (!f) { + fprintf(stderr, "Could not open file %s\n", filename); + return -EINVAL; + } + + while (fgets(line, sizeof(line), f)) { + int err = parse_var_presets(line, presets, capacity, size); + + if (err) + goto cleanup; + } + +cleanup: + fclose(f); + return err; +} + +static bool is_signed_type(const struct btf_type *t) +{ + if (btf_is_int(t)) + return btf_int_encoding(t) & BTF_INT_SIGNED; + if (btf_is_enum(t)) + return btf_kflag(t); + return true; +} + +static int enum_value_from_name(const struct btf *btf, const struct btf_type *t, + const char *evalue, long long *retval) +{ + if (btf_is_enum(t)) { + struct btf_enum *e = btf_enum(t); + int i, n = btf_vlen(t); + + for (i = 0; i < n; ++i) { + const char *cur_name = btf__name_by_offset(btf, e[i].name_off); + + if (strcmp(cur_name, evalue) == 0) { + *retval = e[i].val; + return 0; + } + } + } else if (btf_is_enum64(t)) { + struct btf_enum64 *e = btf_enum64(t); + int i, n = btf_vlen(t); + + for (i = 0; i < n; ++i) { + struct btf_enum64 *cur = e + i; + const char *cur_name = btf__name_by_offset(btf, cur->name_off); + __u64 value = btf_enum64_value(cur); + + if (strcmp(cur_name, evalue) == 0) { + *retval = value; + return 0; + } + } + } + return -EINVAL; +} + +static bool is_preset_supported(const struct btf_type *t) +{ + return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t); +} + +static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct btf_type *t, + struct bpf_map *map, struct btf_var_secinfo *sinfo, + struct var_preset *preset) +{ + const struct btf_type *base_type; + void *ptr; + size_t size; + + base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type)); + if (!base_type) { + fprintf(stderr, "Could not resolve type %d\n", t->type); + return -EINVAL; + } + if (!is_preset_supported(base_type)) { + fprintf(stderr, "Setting global variable for btf kind %d is not supported\n", + btf_kind(base_type)); + return -EINVAL; + } + + if (preset->type == NAME && btf_is_any_enum(base_type)) { + /* Convert enum element name into integer */ + long long ivalue; + + if (enum_value_from_name(btf, base_type, preset->svalue, &ivalue) != 0) { + fprintf(stderr, "Could not find integer value for enum element %s\n", + preset->svalue); + return -EINVAL; + } + free(preset->svalue); + preset->ivalue = ivalue; + preset->type = INTEGRAL; + } + + /* Check if value fits into the target variable size */ + if (sinfo->size < sizeof(preset->ivalue)) { + bool is_signed = is_signed_type(base_type); + __u32 unsigned_bits = sinfo->size * 8 - (is_signed ? 1 : 0); + long long max_val = 1ll << unsigned_bits; + + if (preset->ivalue >= max_val || preset->ivalue < -max_val) { + fprintf(stderr, + "Variable %s value %lld is out of range [%lld; %lld]\n", + btf__name_by_offset(btf, t->name_off), preset->ivalue, + is_signed ? -max_val : 0, max_val - 1); + return -EINVAL; + } + } + + ptr = (void *)bpf_map__initial_value(map, &size); + if (!ptr || (sinfo->offset + sinfo->size > size)) + return -EINVAL; + + if (__BYTE_ORDER == __LITTLE_ENDIAN) { + memcpy(ptr + sinfo->offset, &preset->ivalue, sinfo->size); + } else if (__BYTE_ORDER == __BIG_ENDIAN) { + __u8 src_offset = sizeof(preset->ivalue) - sinfo->size; + + memcpy(ptr + sinfo->offset, (void *)&preset->ivalue + src_offset, sinfo->size); + } + return 0; +} + +static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, int npresets) +{ + struct btf_var_secinfo *sinfo; + const char *sec_name; + const struct btf_type *type; + struct bpf_map *map; + struct btf *btf; + bool *set_var; + int i, j, k, n, cnt, err = 0; + + if (npresets == 0) + return 0; + + btf = bpf_object__btf(obj); + if (!btf) + return -EINVAL; + + set_var = calloc(npresets, sizeof(bool)); + for (i = 0; i < npresets; ++i) + set_var[i] = false; + + cnt = btf__type_cnt(btf); + for (i = 0; i != cnt; ++i) { + type = btf__type_by_id(btf, i); + + if (!btf_is_datasec(type)) + continue; + + sinfo = btf_var_secinfos(type); + sec_name = btf__name_by_offset(btf, type->name_off); + map = bpf_object__find_map_by_name(obj, sec_name); + if (!map) + continue; + + n = btf_vlen(type); + for (j = 0; j < n; ++j, ++sinfo) { + const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type); + const char *var_name = btf__name_by_offset(btf, var_type->name_off); + + if (!btf_is_var(var_type)) + continue; + + for (k = 0; k < npresets; ++k) { + if (strcmp(var_name, presets[k].name) != 0) + continue; + + if (set_var[k]) { + fprintf(stderr, "Variable %s is set more than once", + var_name); + } + + err = set_global_var(obj, btf, var_type, map, sinfo, presets + k); + if (err) + goto out; + + set_var[k] = true; + break; + } + } + } + for (i = 0; i < npresets; ++i) { + if (!set_var[i]) { + fprintf(stderr, "Global variable preset %s has not been applied\n", + presets[i].name); + } + } +out: + free(set_var); + return err; +} + static int process_obj(const char *filename) { const char *base_filename = basename(strdupa(filename)); @@ -1299,7 +1599,7 @@ static int process_obj(const char *filename) struct bpf_program *prog, *tprog, *lprog; libbpf_print_fn_t old_libbpf_print_fn; LIBBPF_OPTS(bpf_object_open_opts, opts); - int err = 0, prog_cnt = 0; + int err = 0; if (!should_process_file_prog(base_filename, NULL)) { if (env.verbose) @@ -1334,17 +1634,6 @@ static int process_obj(const char *filename) env.files_processed++; - bpf_object__for_each_program(prog, obj) { - prog_cnt++; - } - - if (prog_cnt == 1) { - prog = bpf_object__next_program(obj, NULL); - bpf_program__set_autoload(prog, true); - process_prog(filename, obj, prog); - goto cleanup; - } - bpf_object__for_each_program(prog, obj) { const char *prog_name = bpf_program__name(prog); @@ -1355,6 +1644,12 @@ static int process_obj(const char *filename) goto cleanup; } + err = set_global_vars(tobj, env.presets, env.npresets); + if (err) { + fprintf(stderr, "Failed to set global variables\n"); + goto cleanup; + } + lprog = NULL; bpf_object__for_each_program(tprog, tobj) { const char *tprog_name = bpf_program__name(tprog);