Message ID | 5cff26f97e55161b7d56b09ddcf5f8888a5add1d.1689970773.git.dxu@dxuuu.xyz (mailing list archive) |
---|---|
State | Accepted |
Commit | 91721c2d02d3a0141df8a4787c7079b89b0d0607 |
Delegated to: | BPF |
Headers | show |
Series | Support defragmenting IPv(4|6) packets in BPF | expand |
On Fri, Jul 21, 2023 at 02:22:46PM -0600, Daniel Xu wrote: > This commit adds support for enabling IP defrag using pre-existing > netfilter defrag support. Basically all the flag does is bump a refcnt > while the link the active. Checks are also added to ensure the prog > requesting defrag support is run _after_ netfilter defrag hooks. > > We also take care to avoid any issues w.r.t. module unloading -- while > defrag is active on a link, the module is prevented from unloading. > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > --- > include/uapi/linux/bpf.h | 5 ++ > net/netfilter/nf_bpf_link.c | 123 +++++++++++++++++++++++++++++---- > tools/include/uapi/linux/bpf.h | 5 ++ > 3 files changed, 118 insertions(+), 15 deletions(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 739c15906a65..12a5480314a2 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -1187,6 +1187,11 @@ enum bpf_perf_event_type { > */ > #define BPF_F_KPROBE_MULTI_RETURN (1U << 0) > > +/* link_create.netfilter.flags used in LINK_CREATE command for > + * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation. > + */ > +#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0) > + > /* When BPF ldimm64's insn[0].src_reg != 0 then this can have > * the following extensions: > * > diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c > index c36da56d756f..8fe594bbc7e2 100644 > --- a/net/netfilter/nf_bpf_link.c > +++ b/net/netfilter/nf_bpf_link.c > @@ -1,6 +1,8 @@ > // SPDX-License-Identifier: GPL-2.0 > #include <linux/bpf.h> > #include <linux/filter.h> > +#include <linux/kmod.h> > +#include <linux/module.h> > #include <linux/netfilter.h> > > #include <net/netfilter/nf_bpf_link.h> > @@ -23,8 +25,88 @@ struct bpf_nf_link { > struct nf_hook_ops hook_ops; > struct net *net; > u32 dead; > + const struct nf_defrag_hook *defrag_hook; > }; > > +static const struct nf_defrag_hook * > +get_proto_defrag_hook(struct bpf_nf_link *link, > + const struct nf_defrag_hook __rcu *global_hook, > + const char *mod) > +{ > + const struct nf_defrag_hook *hook; > + int err; > + > + /* RCU protects us from races against module unloading */ > + rcu_read_lock(); > + hook = rcu_dereference(global_hook); > + if (!hook) { > + rcu_read_unlock(); > + err = request_module(mod); > + if (err) > + return ERR_PTR(err < 0 ? err : -EINVAL); > + > + rcu_read_lock(); > + hook = rcu_dereference(global_hook); > + } > + > + if (hook && try_module_get(hook->owner)) { > + /* Once we have a refcnt on the module, we no longer need RCU */ > + hook = rcu_pointer_handoff(hook); > + } else { > + WARN_ONCE(!hook, "%s has bad registration", mod); > + hook = ERR_PTR(-ENOENT); > + } > + rcu_read_unlock(); > + > + if (!IS_ERR(hook)) { > + err = hook->enable(link->net); > + if (err) { > + module_put(hook->owner); > + hook = ERR_PTR(err); > + } > + } > + > + return hook; The rcu + module_get logic looks correct to me, but you've dropped all Florian's acks. What's going on? We need explicit acks to merge this through bpf-next.
Hi Alexei, On Thu, Jul 27, 2023 at 06:16:20PM -0700, Alexei Starovoitov wrote: > On Fri, Jul 21, 2023 at 02:22:46PM -0600, Daniel Xu wrote: > > This commit adds support for enabling IP defrag using pre-existing > > netfilter defrag support. Basically all the flag does is bump a refcnt > > while the link the active. Checks are also added to ensure the prog > > requesting defrag support is run _after_ netfilter defrag hooks. > > > > We also take care to avoid any issues w.r.t. module unloading -- while > > defrag is active on a link, the module is prevented from unloading. > > > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > > --- > > include/uapi/linux/bpf.h | 5 ++ > > net/netfilter/nf_bpf_link.c | 123 +++++++++++++++++++++++++++++---- > > tools/include/uapi/linux/bpf.h | 5 ++ > > 3 files changed, 118 insertions(+), 15 deletions(-) > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > index 739c15906a65..12a5480314a2 100644 > > --- a/include/uapi/linux/bpf.h > > +++ b/include/uapi/linux/bpf.h > > @@ -1187,6 +1187,11 @@ enum bpf_perf_event_type { > > */ > > #define BPF_F_KPROBE_MULTI_RETURN (1U << 0) > > > > +/* link_create.netfilter.flags used in LINK_CREATE command for > > + * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation. > > + */ > > +#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0) > > + > > /* When BPF ldimm64's insn[0].src_reg != 0 then this can have > > * the following extensions: > > * > > diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c > > index c36da56d756f..8fe594bbc7e2 100644 > > --- a/net/netfilter/nf_bpf_link.c > > +++ b/net/netfilter/nf_bpf_link.c > > @@ -1,6 +1,8 @@ > > // SPDX-License-Identifier: GPL-2.0 > > #include <linux/bpf.h> > > #include <linux/filter.h> > > +#include <linux/kmod.h> > > +#include <linux/module.h> > > #include <linux/netfilter.h> > > > > #include <net/netfilter/nf_bpf_link.h> > > @@ -23,8 +25,88 @@ struct bpf_nf_link { > > struct nf_hook_ops hook_ops; > > struct net *net; > > u32 dead; > > + const struct nf_defrag_hook *defrag_hook; > > }; > > > > +static const struct nf_defrag_hook * > > +get_proto_defrag_hook(struct bpf_nf_link *link, > > + const struct nf_defrag_hook __rcu *global_hook, > > + const char *mod) > > +{ > > + const struct nf_defrag_hook *hook; > > + int err; > > + > > + /* RCU protects us from races against module unloading */ > > + rcu_read_lock(); > > + hook = rcu_dereference(global_hook); > > + if (!hook) { > > + rcu_read_unlock(); > > + err = request_module(mod); > > + if (err) > > + return ERR_PTR(err < 0 ? err : -EINVAL); > > + > > + rcu_read_lock(); > > + hook = rcu_dereference(global_hook); > > + } > > + > > + if (hook && try_module_get(hook->owner)) { > > + /* Once we have a refcnt on the module, we no longer need RCU */ > > + hook = rcu_pointer_handoff(hook); > > + } else { > > + WARN_ONCE(!hook, "%s has bad registration", mod); > > + hook = ERR_PTR(-ENOENT); > > + } > > + rcu_read_unlock(); > > + > > + if (!IS_ERR(hook)) { > > + err = hook->enable(link->net); > > + if (err) { > > + module_put(hook->owner); > > + hook = ERR_PTR(err); > > + } > > + } > > + > > + return hook; > > The rcu + module_get logic looks correct to me, but you've dropped all Florian's acks. > What's going on? > > We need explicit acks to merge this through bpf-next. I understood acked-by tags to be a lighter form of reviewed-by tag. So b/c the patches changed so much I dropped the tag. It sounds like maybe I misunderstand -- I'll keep it in mind the next time around. Thanks, Daniel
Daniel Xu <dxu@dxuuu.xyz> wrote: > This commit adds support for enabling IP defrag using pre-existing > netfilter defrag support. Basically all the flag does is bump a refcnt > while the link the active. Checks are also added to ensure the prog > requesting defrag support is run _after_ netfilter defrag hooks. > > We also take care to avoid any issues w.r.t. module unloading -- while > defrag is active on a link, the module is prevented from unloading. Reviewed-by: Florian Westphal <fw@strlen.de>
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 739c15906a65..12a5480314a2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1187,6 +1187,11 @@ enum bpf_perf_event_type { */ #define BPF_F_KPROBE_MULTI_RETURN (1U << 0) +/* link_create.netfilter.flags used in LINK_CREATE command for + * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation. + */ +#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index c36da56d756f..8fe594bbc7e2 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> #include <linux/filter.h> +#include <linux/kmod.h> +#include <linux/module.h> #include <linux/netfilter.h> #include <net/netfilter/nf_bpf_link.h> @@ -23,8 +25,88 @@ struct bpf_nf_link { struct nf_hook_ops hook_ops; struct net *net; u32 dead; + const struct nf_defrag_hook *defrag_hook; }; +static const struct nf_defrag_hook * +get_proto_defrag_hook(struct bpf_nf_link *link, + const struct nf_defrag_hook __rcu *global_hook, + const char *mod) +{ + const struct nf_defrag_hook *hook; + int err; + + /* RCU protects us from races against module unloading */ + rcu_read_lock(); + hook = rcu_dereference(global_hook); + if (!hook) { + rcu_read_unlock(); + err = request_module(mod); + if (err) + return ERR_PTR(err < 0 ? err : -EINVAL); + + rcu_read_lock(); + hook = rcu_dereference(global_hook); + } + + if (hook && try_module_get(hook->owner)) { + /* Once we have a refcnt on the module, we no longer need RCU */ + hook = rcu_pointer_handoff(hook); + } else { + WARN_ONCE(!hook, "%s has bad registration", mod); + hook = ERR_PTR(-ENOENT); + } + rcu_read_unlock(); + + if (!IS_ERR(hook)) { + err = hook->enable(link->net); + if (err) { + module_put(hook->owner); + hook = ERR_PTR(err); + } + } + + return hook; +} + +static int bpf_nf_enable_defrag(struct bpf_nf_link *link) +{ + const struct nf_defrag_hook __maybe_unused *hook; + + switch (link->hook_ops.pf) { +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) + case NFPROTO_IPV4: + hook = get_proto_defrag_hook(link, nf_defrag_v4_hook, "nf_defrag_ipv4"); + if (IS_ERR(hook)) + return PTR_ERR(hook); + + link->defrag_hook = hook; + return 0; +#endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + case NFPROTO_IPV6: + hook = get_proto_defrag_hook(link, nf_defrag_v6_hook, "nf_defrag_ipv6"); + if (IS_ERR(hook)) + return PTR_ERR(hook); + + link->defrag_hook = hook; + return 0; +#endif + default: + return -EAFNOSUPPORT; + } +} + +static void bpf_nf_disable_defrag(struct bpf_nf_link *link) +{ + const struct nf_defrag_hook *hook = link->defrag_hook; + + if (!hook) + return; + hook->disable(link->net); + module_put(hook->owner); +} + static void bpf_nf_link_release(struct bpf_link *link) { struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); @@ -32,11 +114,11 @@ static void bpf_nf_link_release(struct bpf_link *link) if (nf_link->dead) return; - /* prevent hook-not-found warning splat from netfilter core when - * .detach was already called - */ - if (!cmpxchg(&nf_link->dead, 0, 1)) + /* do not double release in case .detach was already called */ + if (!cmpxchg(&nf_link->dead, 0, 1)) { nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops); + bpf_nf_disable_defrag(nf_link); + } } static void bpf_nf_link_dealloc(struct bpf_link *link) @@ -92,6 +174,8 @@ static const struct bpf_link_ops bpf_nf_link_lops = { static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr) { + int prio; + switch (attr->link_create.netfilter.pf) { case NFPROTO_IPV4: case NFPROTO_IPV6: @@ -102,19 +186,18 @@ static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr) return -EAFNOSUPPORT; } - if (attr->link_create.netfilter.flags) + if (attr->link_create.netfilter.flags & ~BPF_F_NETFILTER_IP_DEFRAG) return -EOPNOTSUPP; - /* make sure conntrack confirm is always last. - * - * In the future, if userspace can e.g. request defrag, then - * "defrag_requested && prio before NF_IP_PRI_CONNTRACK_DEFRAG" - * should fail. - */ - switch (attr->link_create.netfilter.priority) { - case NF_IP_PRI_FIRST: return -ERANGE; /* sabotage_in and other warts */ - case NF_IP_PRI_LAST: return -ERANGE; /* e.g. conntrack confirm */ - } + /* make sure conntrack confirm is always last */ + prio = attr->link_create.netfilter.priority; + if (prio == NF_IP_PRI_FIRST) + return -ERANGE; /* sabotage_in and other warts */ + else if (prio == NF_IP_PRI_LAST) + return -ERANGE; /* e.g. conntrack confirm */ + else if ((attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) && + prio <= NF_IP_PRI_CONNTRACK_DEFRAG) + return -ERANGE; /* cannot use defrag if prog runs before nf_defrag */ return 0; } @@ -149,6 +232,7 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) link->net = net; link->dead = false; + link->defrag_hook = NULL; err = bpf_link_prime(&link->link, &link_primer); if (err) { @@ -156,8 +240,17 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) return err; } + if (attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) { + err = bpf_nf_enable_defrag(link); + if (err) { + bpf_link_cleanup(&link_primer); + return err; + } + } + err = nf_register_net_hook(net, &link->hook_ops); if (err) { + bpf_nf_disable_defrag(link); bpf_link_cleanup(&link_primer); return err; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 739c15906a65..12a5480314a2 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1187,6 +1187,11 @@ enum bpf_perf_event_type { */ #define BPF_F_KPROBE_MULTI_RETURN (1U << 0) +/* link_create.netfilter.flags used in LINK_CREATE command for + * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation. + */ +#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: *
This commit adds support for enabling IP defrag using pre-existing netfilter defrag support. Basically all the flag does is bump a refcnt while the link the active. Checks are also added to ensure the prog requesting defrag support is run _after_ netfilter defrag hooks. We also take care to avoid any issues w.r.t. module unloading -- while defrag is active on a link, the module is prevented from unloading. Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> --- include/uapi/linux/bpf.h | 5 ++ net/netfilter/nf_bpf_link.c | 123 +++++++++++++++++++++++++++++---- tools/include/uapi/linux/bpf.h | 5 ++ 3 files changed, 118 insertions(+), 15 deletions(-)