Message ID | 2954ab26de09afeecf3a56ba93624f9629072102.1653600578.git.lorenzo@kernel.org (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | net: netfilter: add kfunc helper to update ct timeout | expand |
Lorenzo Bianconi <lorenzo@kernel.org> wrote: > From: Kumar Kartikeya Dwivedi <memxor@gmail.com> > > Since we want to allow user to set some fields in nf_conn after it is > allocated but before it is inserted, we can permit BPF_WRITE for normal > nf_conn, and then mark return value as read only on insert, preventing > further BPF_WRITE. This way, nf_conn can be written to using normal > BPF instructions after allocation, but not after insertion. > > Note that we special nf_conn a bit here, inside the btf_struct_access > callback for XDP and TC programs. Since this is the only struct for > these programs requiring such adjustments, making this mechanism > more generic has been left as an exercise for a future patch adding > custom callbacks for more structs. Are you sure this is safe? As far as I can see this allows nf_conn->status = ~0ul. I'm fairly sure this isn't a good idea, see nf_ct_delete() for example.
Hi Lorenzo,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on bpf-next/master]
url: https://github.com/intel-lab-lkp/linux/commits/Lorenzo-Bianconi/net-netfilter-add-kfunc-helper-to-update-ct-timeout/20220527-053913
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: um-i386_defconfig (https://download.01.org/0day-ci/archive/20220527/202205270749.blol7EcF-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-1) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/c346565af9b023d9231ca8fca2e1b8c66a782f84
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Lorenzo-Bianconi/net-netfilter-add-kfunc-helper-to-update-ct-timeout/20220527-053913
git checkout c346565af9b023d9231ca8fca2e1b8c66a782f84
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=um SUBARCH=i386 SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
net/core/filter.c: In function 'xdp_tc_btf_struct_access':
>> net/core/filter.c:10479:16: error: implicit declaration of function 'btf_struct_access'; did you mean 'xdp_tc_btf_struct_access'? [-Werror=implicit-function-declaration]
10479 | return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag);
| ^~~~~~~~~~~~~~~~~
| xdp_tc_btf_struct_access
cc1: some warnings being treated as errors
vim +10479 net/core/filter.c
10459
10460 static int xdp_tc_btf_struct_access(struct bpf_verifier_log *log,
10461 const struct btf *btf,
10462 const struct btf_type *t, int off, int size,
10463 enum bpf_access_type atype,
10464 u32 *next_btf_id, enum bpf_type_flag *flag)
10465 {
10466 int ret;
10467
10468 if (atype == BPF_READ || !READ_ONCE(nf_conn_btf_struct_access))
10469 goto end;
10470 mutex_lock(&nf_conn_btf_struct_access_mtx);
10471 if (!nf_conn_btf_struct_access)
10472 goto end_unlock;
10473 ret = nf_conn_btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag);
10474 mutex_unlock(&nf_conn_btf_struct_access_mtx);
10475 return ret;
10476 end_unlock:
10477 mutex_unlock(&nf_conn_btf_struct_access_mtx);
10478 end:
10479 return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag);
10480 }
10481
Hi Lorenzo,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on bpf-next/master]
url: https://github.com/intel-lab-lkp/linux/commits/Lorenzo-Bianconi/net-netfilter-add-kfunc-helper-to-update-ct-timeout/20220527-053913
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: x86_64-randconfig-a005 (https://download.01.org/0day-ci/archive/20220527/202205271522.W0HxUVz1-lkp@intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project 134d7f9a4b97e9035150d970bd9e376043c4577e)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/c346565af9b023d9231ca8fca2e1b8c66a782f84
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Lorenzo-Bianconi/net-netfilter-add-kfunc-helper-to-update-ct-timeout/20220527-053913
git checkout c346565af9b023d9231ca8fca2e1b8c66a782f84
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash net/
If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
>> net/core/filter.c:10479:9: error: call to undeclared function 'btf_struct_access'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag);
^
1 error generated.
vim +/btf_struct_access +10479 net/core/filter.c
10459
10460 static int xdp_tc_btf_struct_access(struct bpf_verifier_log *log,
10461 const struct btf *btf,
10462 const struct btf_type *t, int off, int size,
10463 enum bpf_access_type atype,
10464 u32 *next_btf_id, enum bpf_type_flag *flag)
10465 {
10466 int ret;
10467
10468 if (atype == BPF_READ || !READ_ONCE(nf_conn_btf_struct_access))
10469 goto end;
10470 mutex_lock(&nf_conn_btf_struct_access_mtx);
10471 if (!nf_conn_btf_struct_access)
10472 goto end_unlock;
10473 ret = nf_conn_btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag);
10474 mutex_unlock(&nf_conn_btf_struct_access_mtx);
10475 return ret;
10476 end_unlock:
10477 mutex_unlock(&nf_conn_btf_struct_access_mtx);
10478 end:
10479 return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag);
10480 }
10481
Hi Lorenzo, Thank you for the patch! Yet something to improve: [auto build test ERROR on bpf-next/master] url: https://github.com/intel-lab-lkp/linux/commits/Lorenzo-Bianconi/net-netfilter-add-kfunc-helper-to-update-ct-timeout/20220527-053913 base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master config: x86_64-rhel-8.3-kselftests (https://download.01.org/0day-ci/archive/20220527/202205271714.Uznf3vlP-lkp@intel.com/config) compiler: gcc-11 (Debian 11.3.0-1) 11.3.0 reproduce (this is a W=1 build): # https://github.com/intel-lab-lkp/linux/commit/c346565af9b023d9231ca8fca2e1b8c66a782f84 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Lorenzo-Bianconi/net-netfilter-add-kfunc-helper-to-update-ct-timeout/20220527-053913 git checkout c346565af9b023d9231ca8fca2e1b8c66a782f84 # save the config file mkdir build_dir && cp config build_dir/.config make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash If you fix the issue, kindly add following tag where applicable Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>, old ones prefixed by <<): >> ERROR: modpost: "btf_type_by_id" [net/netfilter/nf_conntrack.ko] undefined! >> ERROR: modpost: "nf_conn_btf_struct_access_mtx" [net/netfilter/nf_conntrack.ko] undefined! >> ERROR: modpost: "bpf_log" [net/netfilter/nf_conntrack.ko] undefined! >> ERROR: modpost: "nf_conn_btf_struct_access" [net/netfilter/nf_conntrack.ko] undefined!
On Fri, May 27, 2022 at 03:15:58AM IST, Florian Westphal wrote: > Lorenzo Bianconi <lorenzo@kernel.org> wrote: > > From: Kumar Kartikeya Dwivedi <memxor@gmail.com> > > > > Since we want to allow user to set some fields in nf_conn after it is > > allocated but before it is inserted, we can permit BPF_WRITE for normal > > nf_conn, and then mark return value as read only on insert, preventing > > further BPF_WRITE. This way, nf_conn can be written to using normal > > BPF instructions after allocation, but not after insertion. > > > > Note that we special nf_conn a bit here, inside the btf_struct_access > > callback for XDP and TC programs. Since this is the only struct for > > these programs requiring such adjustments, making this mechanism > > more generic has been left as an exercise for a future patch adding > > custom callbacks for more structs. > > Are you sure this is safe? > As far as I can see this allows nf_conn->status = ~0ul. > I'm fairly sure this isn't a good idea, see nf_ct_delete() for example. This only allows writing to an allocated but not yet inserted nf_conn. The idea was that insert checks whether ct->status only has permitted bits set before making the entry visible, and then we make nf_conn pointer read only, however the runtime check seems to be missing right now in patch 12; something to fix in v5. With that sorted, would it be fine? -- Kartikeya
Kumar Kartikeya Dwivedi <memxor@gmail.com> wrote: > On Fri, May 27, 2022 at 03:15:58AM IST, Florian Westphal wrote: > > Lorenzo Bianconi <lorenzo@kernel.org> wrote: > > > From: Kumar Kartikeya Dwivedi <memxor@gmail.com> > > > > > > Since we want to allow user to set some fields in nf_conn after it is > > > allocated but before it is inserted, we can permit BPF_WRITE for normal > > > nf_conn, and then mark return value as read only on insert, preventing > > > further BPF_WRITE. This way, nf_conn can be written to using normal > > > BPF instructions after allocation, but not after insertion. > > > > > > Note that we special nf_conn a bit here, inside the btf_struct_access > > > callback for XDP and TC programs. Since this is the only struct for > > > these programs requiring such adjustments, making this mechanism > > > more generic has been left as an exercise for a future patch adding > > > custom callbacks for more structs. > > > > Are you sure this is safe? > > As far as I can see this allows nf_conn->status = ~0ul. > > I'm fairly sure this isn't a good idea, see nf_ct_delete() for example. > > This only allows writing to an allocated but not yet inserted nf_conn. The idea > was that insert checks whether ct->status only has permitted bits set before > making the entry visible, and then we make nf_conn pointer read only, however > the runtime check seems to be missing right now in patch 12; something to fix in > v5. With that sorted, would it be fine? Its fragile, e.g. what if I set TEMPLATE bit? If refcount goes down to 0, object is released via kfree() instead of kmem_cache_free. What if I clear SNAT_DONE bit? Would it leave the (freed) entry on the bysource hash list (see nf_nat_core.c)? Or is there some magic that prevents this from happening? I have no idea how processing pipeline looks like...
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 49e3e7f4b0f9..bc5d8d0e63d1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -640,6 +640,12 @@ struct bpf_prog_ops { union bpf_attr __user *uattr); }; +typedef int (*btf_struct_access_t)(struct bpf_verifier_log *log, + const struct btf *btf, + const struct btf_type *t, int off, int size, + enum bpf_access_type atype, u32 *next_btf_id, + enum bpf_type_flag *flag); + struct bpf_verifier_ops { /* return eBPF function prototype for verification */ const struct bpf_func_proto * @@ -660,11 +666,7 @@ struct bpf_verifier_ops { const struct bpf_insn *src, struct bpf_insn *dst, struct bpf_prog *prog, u32 *target_size); - int (*btf_struct_access)(struct bpf_verifier_log *log, - const struct btf *btf, - const struct btf_type *t, int off, int size, - enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag); + btf_struct_access_t btf_struct_access; }; struct bpf_prog_offload_ops { diff --git a/include/linux/filter.h b/include/linux/filter.h index ed0c0ff42ad5..95cbf9b28927 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1389,6 +1389,9 @@ struct bpf_sk_lookup_kern { extern struct static_key_false bpf_sk_lookup_enabled; +extern btf_struct_access_t nf_conn_btf_struct_access; +extern struct mutex nf_conn_btf_struct_access_mtx; + /* Runners for BPF_SK_LOOKUP programs to invoke on socket lookup. * * Allowed return values for a BPF SK_LOOKUP program are SK_PASS and diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h index a473b56842c5..15c027549250 100644 --- a/include/net/netfilter/nf_conntrack_bpf.h +++ b/include/net/netfilter/nf_conntrack_bpf.h @@ -10,6 +10,7 @@ (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) extern int register_nf_conntrack_bpf(void); +extern void unregister_nf_conntrack_bpf(void); #else @@ -18,6 +19,10 @@ static inline int register_nf_conntrack_bpf(void) return 0; } +static inline void unregister_nf_conntrack_bpf(void) +{ +} + #endif #endif /* _NF_CONNTRACK_BPF_H */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9b8962e6bc14..8cc754d83521 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -13342,6 +13342,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) for (i = 0; i < insn_cnt; i++, insn++) { bpf_convert_ctx_access_t convert_ctx_access; + enum bpf_prog_type prog_type; bool ctx_access; if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || @@ -13385,6 +13386,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) if (!ctx_access) continue; + prog_type = resolve_prog_type(env->prog); switch ((int)env->insn_aux_data[i + delta].ptr_type) { case PTR_TO_CTX: if (!ops->convert_ctx_access) @@ -13409,7 +13411,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code); env->prog->aux->num_exentries++; - } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) { + } else if (prog_type != BPF_PROG_TYPE_STRUCT_OPS && + prog_type != BPF_PROG_TYPE_XDP && + prog_type != BPF_PROG_TYPE_SCHED_CLS) { verbose(env, "Writes through BTF pointers are not allowed\n"); return -EINVAL; } diff --git a/net/core/filter.c b/net/core/filter.c index 5af58eb48587..bacc39eb9526 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -10453,6 +10453,32 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } +btf_struct_access_t nf_conn_btf_struct_access; +/* Protects nf_conn_btf_struct_access */ +DEFINE_MUTEX(nf_conn_btf_struct_access_mtx); + +static int xdp_tc_btf_struct_access(struct bpf_verifier_log *log, + const struct btf *btf, + const struct btf_type *t, int off, int size, + enum bpf_access_type atype, + u32 *next_btf_id, enum bpf_type_flag *flag) +{ + int ret; + + if (atype == BPF_READ || !READ_ONCE(nf_conn_btf_struct_access)) + goto end; + mutex_lock(&nf_conn_btf_struct_access_mtx); + if (!nf_conn_btf_struct_access) + goto end_unlock; + ret = nf_conn_btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag); + mutex_unlock(&nf_conn_btf_struct_access_mtx); + return ret; +end_unlock: + mutex_unlock(&nf_conn_btf_struct_access_mtx); +end: + return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag); +} + const struct bpf_verifier_ops sk_filter_verifier_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, @@ -10470,6 +10496,7 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = { .convert_ctx_access = tc_cls_act_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, .gen_ld_abs = bpf_gen_ld_abs, + .btf_struct_access = xdp_tc_btf_struct_access, }; const struct bpf_prog_ops tc_cls_act_prog_ops = { @@ -10481,6 +10508,7 @@ const struct bpf_verifier_ops xdp_verifier_ops = { .is_valid_access = xdp_is_valid_access, .convert_ctx_access = xdp_convert_ctx_access, .gen_prologue = bpf_noop_prologue, + .btf_struct_access = xdp_tc_btf_struct_access, }; const struct bpf_prog_ops xdp_prog_ops = { diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index 195ec68d309d..fbf58eb74c79 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -9,7 +9,9 @@ #include <linux/bpf.h> #include <linux/btf.h> #include <linux/types.h> +#include <linux/filter.h> #include <linux/btf_ids.h> +#include <linux/bpf_verifier.h> #include <linux/net_namespace.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_bpf.h> @@ -257,10 +259,62 @@ static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = { .ret_null_set = &nf_ct_ret_null_kfunc_ids, }; +BTF_ID_LIST_SINGLE(nf_conn_btf_id, struct, nf_conn) + +static int nf_conn_bpf_btf_struct_access(struct bpf_verifier_log *log, + const struct btf *btf, + const struct btf_type *t, int off, + int size, enum bpf_access_type atype, + u32 *next_btf_id, + enum bpf_type_flag *flag) +{ + const struct btf_type *nf_conn_t; + size_t end; + + WARN_ON_ONCE(atype != BPF_WRITE); + + nf_conn_t = btf_type_by_id(btf, nf_conn_btf_id[0]); + if (!nf_conn_t || nf_conn_t != t) { + bpf_log(log, "only read is supported"); + return -EACCES; + } + + switch (off) { + case offsetof(struct nf_conn, status): + end = offsetofend(struct nf_conn, status); + break; + default: + bpf_log(log, "no write support to nf_conn at off %d\n", off); + return -EACCES; + } + + if (off + size > end) { + bpf_log(log, + "write access at off %d with size %d beyond the member of nf_conn ended at %zu\n", + off, size, end); + return -EACCES; + } + + return NOT_INIT; +} + int register_nf_conntrack_bpf(void) { int ret; ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set); - return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set); + if (ret < 0) + return ret; + mutex_lock(&nf_conn_btf_struct_access_mtx); + nf_conn_btf_struct_access = nf_conn_bpf_btf_struct_access; + mutex_unlock(&nf_conn_btf_struct_access_mtx); + return 0; +} + +void unregister_nf_conntrack_bpf(void) +{ + mutex_lock(&nf_conn_btf_struct_access_mtx); + nf_conn_btf_struct_access = NULL; + mutex_unlock(&nf_conn_btf_struct_access_mtx); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 082a2fd8d85b..91f890972f9e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2497,6 +2497,8 @@ void nf_conntrack_cleanup_start(void) void nf_conntrack_cleanup_end(void) { + unregister_nf_conntrack_bpf(); + RCU_INIT_POINTER(nf_ct_hook, NULL); cancel_delayed_work_sync(&conntrack_gc_work.dwork); kvfree(nf_conntrack_hash);