Message ID | 20230814172820.1362751-1-yonghong.song@linux.dev (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | Add support for local percpu kptr | expand |
On 8/14/23 1:28 PM, Yonghong Song wrote: > BPF_KPTR_PERCPU_REF represents a percpu field type like below > > struct val_t { > ... fields ... > }; > struct t { > ... > struct val_t __percpu *percpu_data_ptr; > ... > }; > > where > #define __percpu __attribute__((btf_type_tag("percpu"))) nit: Maybe this should be __percpu_kptr (and similar for the actual tag)? I don't feel strongly about this. It's certainly less concise, but given that existing docs mention kptrs a few times, and anyone using percpu kptrs can probably be expected to have some familiarity with "normal" kptrs, making it more clear to BPF program writers that their existing mental model of what a kptr is and how it should be used seems beneficial. > > While BPF_KPTR_REF points to a trusted kernel object or a trusted > local object, BPF_KPTR_PERCPU_REF points to a trusted local > percpu object. > > This patch added basic support for BPF_KPTR_PERCPU_REF > related to percpu field parsing, recording and free operations. > BPF_KPTR_PERCPU_REF also supports the same map types > as BPF_KPTR_REF does. > > Note that unlike a local kptr, it is possible that > a BPF_KTPR_PERCUP_REF struct may not contain any nit: typo here ("BPF_KTPR_PERCUP_REF" -> "BPF_KPTR_PERCPU_REF") > special fields like other kptr, bpf_spin_lock, bpf_list_head, etc. > > Signed-off-by: Yonghong Song <yonghong.song@linux.dev> > --- > include/linux/bpf.h | 18 ++++++++++++------ > kernel/bpf/btf.c | 5 +++++ > kernel/bpf/syscall.c | 7 ++++++- > 3 files changed, 23 insertions(+), 7 deletions(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 60e80e90c37d..e6348fd0a785 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -180,14 +180,15 @@ enum btf_field_type { > BPF_TIMER = (1 << 1), > BPF_KPTR_UNREF = (1 << 2), > BPF_KPTR_REF = (1 << 3), > - BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF, > - BPF_LIST_HEAD = (1 << 4), > - BPF_LIST_NODE = (1 << 5), > - BPF_RB_ROOT = (1 << 6), > - BPF_RB_NODE = (1 << 7), > + BPF_KPTR_PERCPU_REF = (1 << 4), > + BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF | BPF_KPTR_PERCPU_REF, > + BPF_LIST_HEAD = (1 << 5), > + BPF_LIST_NODE = (1 << 6), > + BPF_RB_ROOT = (1 << 7), > + BPF_RB_NODE = (1 << 8), > BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD | > BPF_RB_NODE | BPF_RB_ROOT, > - BPF_REFCOUNT = (1 << 8), > + BPF_REFCOUNT = (1 << 9), > }; > > typedef void (*btf_dtor_kfunc_t)(void *); > @@ -300,6 +301,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) > case BPF_KPTR_UNREF: > case BPF_KPTR_REF: > return "kptr"; > + case BPF_KPTR_PERCPU_REF: > + return "kptr_percpu"; > case BPF_LIST_HEAD: > return "bpf_list_head"; > case BPF_LIST_NODE: > @@ -325,6 +328,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type) > return sizeof(struct bpf_timer); > case BPF_KPTR_UNREF: > case BPF_KPTR_REF: > + case BPF_KPTR_PERCPU_REF: > return sizeof(u64); > case BPF_LIST_HEAD: > return sizeof(struct bpf_list_head); > @@ -351,6 +355,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type) > return __alignof__(struct bpf_timer); > case BPF_KPTR_UNREF: > case BPF_KPTR_REF: > + case BPF_KPTR_PERCPU_REF: > return __alignof__(u64); > case BPF_LIST_HEAD: > return __alignof__(struct bpf_list_head); > @@ -389,6 +394,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) > case BPF_TIMER: > case BPF_KPTR_UNREF: > case BPF_KPTR_REF: > + case BPF_KPTR_PERCPU_REF: > break; > default: > WARN_ON_ONCE(1); > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c > index 249657c466dd..bc1792edc64e 100644 > --- a/kernel/bpf/btf.c > +++ b/kernel/bpf/btf.c > @@ -3293,6 +3293,8 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, > type = BPF_KPTR_UNREF; > else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off))) > type = BPF_KPTR_REF; > + else if (!strcmp("percpu", __btf_name_by_offset(btf, t->name_off))) > + type = BPF_KPTR_PERCPU_REF; > else > return -EINVAL; > > @@ -3457,6 +3459,7 @@ static int btf_find_struct_field(const struct btf *btf, > break; > case BPF_KPTR_UNREF: > case BPF_KPTR_REF: > + case BPF_KPTR_PERCPU_REF: > ret = btf_find_kptr(btf, member_type, off, sz, > idx < info_cnt ? &info[idx] : &tmp); > if (ret < 0) > @@ -3523,6 +3526,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, > break; > case BPF_KPTR_UNREF: > case BPF_KPTR_REF: > + case BPF_KPTR_PERCPU_REF: > ret = btf_find_kptr(btf, var_type, off, sz, > idx < info_cnt ? &info[idx] : &tmp); > if (ret < 0) > @@ -3783,6 +3787,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type > break; > case BPF_KPTR_UNREF: > case BPF_KPTR_REF: > + case BPF_KPTR_PERCPU_REF: > ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]); > if (ret < 0) > goto end; > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index 7f4e8c357a6a..1c30b6ee84d4 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -514,6 +514,7 @@ void btf_record_free(struct btf_record *rec) > switch (rec->fields[i].type) { > case BPF_KPTR_UNREF: > case BPF_KPTR_REF: > + case BPF_KPTR_PERCPU_REF: > if (rec->fields[i].kptr.module) > module_put(rec->fields[i].kptr.module); > btf_put(rec->fields[i].kptr.btf); > @@ -560,6 +561,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec) > switch (fields[i].type) { > case BPF_KPTR_UNREF: > case BPF_KPTR_REF: > + case BPF_KPTR_PERCPU_REF: > btf_get(fields[i].kptr.btf); > if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) { > ret = -ENXIO; > @@ -650,6 +652,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) > WRITE_ONCE(*(u64 *)field_ptr, 0); > break; > case BPF_KPTR_REF: > + case BPF_KPTR_PERCPU_REF: > xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0); > if (!xchgd_field) > break; > @@ -657,7 +660,8 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) > if (!btf_is_kernel(field->kptr.btf)) { > pointee_struct_meta = btf_find_struct_meta(field->kptr.btf, > field->kptr.btf_id); > - WARN_ON_ONCE(!pointee_struct_meta); > + if (field->type != BPF_KPTR_PERCPU_REF) > + WARN_ON_ONCE(!pointee_struct_meta); > migrate_disable(); > __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ? > pointee_struct_meta->record : > @@ -1046,6 +1050,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, > break; > case BPF_KPTR_UNREF: > case BPF_KPTR_REF: > + case BPF_KPTR_PERCPU_REF: > case BPF_REFCOUNT: > if (map->map_type != BPF_MAP_TYPE_HASH && > map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
On Fri, Aug 18, 2023 at 02:37:41PM -0400, David Marchevsky wrote: > On 8/14/23 1:28 PM, Yonghong Song wrote: > > BPF_KPTR_PERCPU_REF represents a percpu field type like below > > > > struct val_t { > > ... fields ... > > }; > > struct t { > > ... > > struct val_t __percpu *percpu_data_ptr; > > ... > > }; > > > > where > > #define __percpu __attribute__((btf_type_tag("percpu"))) > > nit: Maybe this should be __percpu_kptr (and similar for the actual tag)? +1. I think it might conflict with kernel: include/linux/compiler_types.h:# define __percpu BTF_TYPE_TAG(percpu) It's the same tag name, but the kernel semantics are different from our kptr semantics inside bpf prog. I think we have to use a different tag like: #define __percpu_kptr __attribute__((btf_type_tag("percpu_kptr"))) > > index 60e80e90c37d..e6348fd0a785 100644 > > --- a/include/linux/bpf.h > > +++ b/include/linux/bpf.h > > @@ -180,14 +180,15 @@ enum btf_field_type { > > BPF_TIMER = (1 << 1), > > BPF_KPTR_UNREF = (1 << 2), > > BPF_KPTR_REF = (1 << 3), > > - BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF, > > - BPF_LIST_HEAD = (1 << 4), > > - BPF_LIST_NODE = (1 << 5), > > - BPF_RB_ROOT = (1 << 6), > > - BPF_RB_NODE = (1 << 7), > > + BPF_KPTR_PERCPU_REF = (1 << 4), I think _REF is redundant here. _UNREF is obsolete. We might remove it and rename BPF_KPTR_REF to just BPF_KPTR. BPF_KPTR_PERCPU should be clear enough.
On 8/18/23 11:37 AM, David Marchevsky wrote: > On 8/14/23 1:28 PM, Yonghong Song wrote: >> BPF_KPTR_PERCPU_REF represents a percpu field type like below >> >> struct val_t { >> ... fields ... >> }; >> struct t { >> ... >> struct val_t __percpu *percpu_data_ptr; >> ... >> }; >> >> where >> #define __percpu __attribute__((btf_type_tag("percpu"))) > > nit: Maybe this should be __percpu_kptr (and similar for the actual tag)? > > I don't feel strongly about this. It's certainly less concise, but given that > existing docs mention kptrs a few times, and anyone using percpu kptrs can > probably be expected to have some familiarity with "normal" kptrs, making > it more clear to BPF program writers that their existing mental model of > what a kptr is and how it should be used seems beneficial. Thanks for suggestion. As Alexei suggested later as well, __percpu_kptr is better than __percpu so users won't be confused with kernel __percpu tag. > >> >> While BPF_KPTR_REF points to a trusted kernel object or a trusted >> local object, BPF_KPTR_PERCPU_REF points to a trusted local >> percpu object. >> >> This patch added basic support for BPF_KPTR_PERCPU_REF >> related to percpu field parsing, recording and free operations. >> BPF_KPTR_PERCPU_REF also supports the same map types >> as BPF_KPTR_REF does. >> >> Note that unlike a local kptr, it is possible that >> a BPF_KTPR_PERCUP_REF struct may not contain any > > nit: typo here ("BPF_KTPR_PERCUP_REF" -> "BPF_KPTR_PERCPU_REF") Ack. Thanks! > >> special fields like other kptr, bpf_spin_lock, bpf_list_head, etc. >> >> Signed-off-by: Yonghong Song <yonghong.song@linux.dev> >> --- >> include/linux/bpf.h | 18 ++++++++++++------ >> kernel/bpf/btf.c | 5 +++++ >> kernel/bpf/syscall.c | 7 ++++++- >> 3 files changed, 23 insertions(+), 7 deletions(-) >> [...]
On 8/18/23 4:24 PM, Alexei Starovoitov wrote: > On Fri, Aug 18, 2023 at 02:37:41PM -0400, David Marchevsky wrote: >> On 8/14/23 1:28 PM, Yonghong Song wrote: >>> BPF_KPTR_PERCPU_REF represents a percpu field type like below >>> >>> struct val_t { >>> ... fields ... >>> }; >>> struct t { >>> ... >>> struct val_t __percpu *percpu_data_ptr; >>> ... >>> }; >>> >>> where >>> #define __percpu __attribute__((btf_type_tag("percpu"))) >> >> nit: Maybe this should be __percpu_kptr (and similar for the actual tag)? > > +1. > > I think it might conflict with kernel: > include/linux/compiler_types.h:# define __percpu BTF_TYPE_TAG(percpu) > It's the same tag name, but the kernel semantics are different from our kptr > semantics inside bpf prog. > I think we have to use a different tag like: > #define __percpu_kptr __attribute__((btf_type_tag("percpu_kptr"))) Agree. Will use __percpu_kptr in the next revision. > >>> index 60e80e90c37d..e6348fd0a785 100644 >>> --- a/include/linux/bpf.h >>> +++ b/include/linux/bpf.h >>> @@ -180,14 +180,15 @@ enum btf_field_type { >>> BPF_TIMER = (1 << 1), >>> BPF_KPTR_UNREF = (1 << 2), >>> BPF_KPTR_REF = (1 << 3), >>> - BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF, >>> - BPF_LIST_HEAD = (1 << 4), >>> - BPF_LIST_NODE = (1 << 5), >>> - BPF_RB_ROOT = (1 << 6), >>> - BPF_RB_NODE = (1 << 7), >>> + BPF_KPTR_PERCPU_REF = (1 << 4), > > I think _REF is redundant here. _UNREF is obsolete. We might remove it and > rename BPF_KPTR_REF to just BPF_KPTR. > BPF_KPTR_PERCPU should be clear enough. Okay, will use BPF_KPTR_PERCPU in the next revision.
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 60e80e90c37d..e6348fd0a785 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -180,14 +180,15 @@ enum btf_field_type { BPF_TIMER = (1 << 1), BPF_KPTR_UNREF = (1 << 2), BPF_KPTR_REF = (1 << 3), - BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF, - BPF_LIST_HEAD = (1 << 4), - BPF_LIST_NODE = (1 << 5), - BPF_RB_ROOT = (1 << 6), - BPF_RB_NODE = (1 << 7), + BPF_KPTR_PERCPU_REF = (1 << 4), + BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF | BPF_KPTR_PERCPU_REF, + BPF_LIST_HEAD = (1 << 5), + BPF_LIST_NODE = (1 << 6), + BPF_RB_ROOT = (1 << 7), + BPF_RB_NODE = (1 << 8), BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD | BPF_RB_NODE | BPF_RB_ROOT, - BPF_REFCOUNT = (1 << 8), + BPF_REFCOUNT = (1 << 9), }; typedef void (*btf_dtor_kfunc_t)(void *); @@ -300,6 +301,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) case BPF_KPTR_UNREF: case BPF_KPTR_REF: return "kptr"; + case BPF_KPTR_PERCPU_REF: + return "kptr_percpu"; case BPF_LIST_HEAD: return "bpf_list_head"; case BPF_LIST_NODE: @@ -325,6 +328,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type) return sizeof(struct bpf_timer); case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU_REF: return sizeof(u64); case BPF_LIST_HEAD: return sizeof(struct bpf_list_head); @@ -351,6 +355,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type) return __alignof__(struct bpf_timer); case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU_REF: return __alignof__(u64); case BPF_LIST_HEAD: return __alignof__(struct bpf_list_head); @@ -389,6 +394,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) case BPF_TIMER: case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU_REF: break; default: WARN_ON_ONCE(1); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 249657c466dd..bc1792edc64e 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3293,6 +3293,8 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, type = BPF_KPTR_UNREF; else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off))) type = BPF_KPTR_REF; + else if (!strcmp("percpu", __btf_name_by_offset(btf, t->name_off))) + type = BPF_KPTR_PERCPU_REF; else return -EINVAL; @@ -3457,6 +3459,7 @@ static int btf_find_struct_field(const struct btf *btf, break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU_REF: ret = btf_find_kptr(btf, member_type, off, sz, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) @@ -3523,6 +3526,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU_REF: ret = btf_find_kptr(btf, var_type, off, sz, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) @@ -3783,6 +3787,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU_REF: ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]); if (ret < 0) goto end; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7f4e8c357a6a..1c30b6ee84d4 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -514,6 +514,7 @@ void btf_record_free(struct btf_record *rec) switch (rec->fields[i].type) { case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU_REF: if (rec->fields[i].kptr.module) module_put(rec->fields[i].kptr.module); btf_put(rec->fields[i].kptr.btf); @@ -560,6 +561,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec) switch (fields[i].type) { case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU_REF: btf_get(fields[i].kptr.btf); if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) { ret = -ENXIO; @@ -650,6 +652,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) WRITE_ONCE(*(u64 *)field_ptr, 0); break; case BPF_KPTR_REF: + case BPF_KPTR_PERCPU_REF: xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0); if (!xchgd_field) break; @@ -657,7 +660,8 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) if (!btf_is_kernel(field->kptr.btf)) { pointee_struct_meta = btf_find_struct_meta(field->kptr.btf, field->kptr.btf_id); - WARN_ON_ONCE(!pointee_struct_meta); + if (field->type != BPF_KPTR_PERCPU_REF) + WARN_ON_ONCE(!pointee_struct_meta); migrate_disable(); __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ? pointee_struct_meta->record : @@ -1046,6 +1050,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU_REF: case BPF_REFCOUNT: if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
BPF_KPTR_PERCPU_REF represents a percpu field type like below struct val_t { ... fields ... }; struct t { ... struct val_t __percpu *percpu_data_ptr; ... }; where #define __percpu __attribute__((btf_type_tag("percpu"))) While BPF_KPTR_REF points to a trusted kernel object or a trusted local object, BPF_KPTR_PERCPU_REF points to a trusted local percpu object. This patch added basic support for BPF_KPTR_PERCPU_REF related to percpu field parsing, recording and free operations. BPF_KPTR_PERCPU_REF also supports the same map types as BPF_KPTR_REF does. Note that unlike a local kptr, it is possible that a BPF_KTPR_PERCUP_REF struct may not contain any special fields like other kptr, bpf_spin_lock, bpf_list_head, etc. Signed-off-by: Yonghong Song <yonghong.song@linux.dev> --- include/linux/bpf.h | 18 ++++++++++++------ kernel/bpf/btf.c | 5 +++++ kernel/bpf/syscall.c | 7 ++++++- 3 files changed, 23 insertions(+), 7 deletions(-)