@@ -2,6 +2,7 @@
#ifndef __LINUX_NETFILTER_H
#define __LINUX_NETFILTER_H
+#include <linux/filter.h>
#include <linux/init.h>
#include <linux/skbuff.h>
#include <linux/net.h>
@@ -106,6 +107,9 @@ struct nf_hook_entries_rcu_head {
};
struct nf_hook_entries {
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+ struct bpf_prog *hook_prog;
+#endif
u16 num_hook_entries;
/* padding */
struct nf_hook_entry hooks[];
@@ -205,6 +209,17 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state,
const struct nf_hook_entries *e);
+
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+DECLARE_BPF_DISPATCHER(nf_hook_base);
+
+static __always_inline int bpf_prog_run_nf(const struct bpf_prog *prog,
+ struct nf_hook_state *state)
+{
+ return __bpf_prog_run(prog, state, BPF_DISPATCHER_FUNC(nf_hook_base));
+}
+#endif
+
/**
* nf_hook - call a netfilter hook
*
@@ -259,11 +274,24 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
if (hook_head) {
struct nf_hook_state state;
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+ const struct bpf_prog *p = READ_ONCE(hook_head->hook_prog);
+
+ nf_hook_state_init(&state, hook, pf, indev, outdev,
+ sk, net, okfn);
+
+ state.priv = (void *)hook_head;
+ state.skb = skb;
+ migrate_disable();
+ ret = bpf_prog_run_nf(p, &state);
+ migrate_enable();
+#else
nf_hook_state_init(&state, hook, pf, indev, outdev,
sk, net, okfn);
ret = nf_hook_slow(skb, &state, hook_head);
+#endif
}
rcu_read_unlock();
@@ -341,10 +369,38 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
if (hook_head) {
struct nf_hook_state state;
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+ const struct bpf_prog *p = hook_head->hook_prog;
+ struct sk_buff *skb, *next;
+ struct list_head sublist;
+ int ret;
nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn);
+ INIT_LIST_HEAD(&sublist);
+
+ migrate_disable();
+
+ list_for_each_entry_safe(skb, next, head, list) {
+ skb_list_del_init(skb);
+
+ state.priv = (void *)hook_head;
+ state.skb = skb;
+
+ ret = bpf_prog_run_nf(p, &state);
+ if (ret == 1)
+ list_add_tail(&skb->list, &sublist);
+ }
+
+ migrate_enable();
+
+ /* Put passed packets back on main list */
+ list_splice(&sublist, head);
+#else
+ nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn);
+
nf_hook_slow_list(head, &state, hook_head);
+#endif
}
rcu_read_unlock();
}
new file mode 100644
@@ -0,0 +1,14 @@
+struct bpf_dispatcher;
+struct bpf_prog;
+
+struct bpf_prog *nf_hook_bpf_create(const struct nf_hook_entries *n);
+struct bpf_prog *nf_hook_bpf_create_fb(void);
+
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+void nf_hook_bpf_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to);
+#else
+static inline void
+nf_hook_bpf_change_prog(struct bpf_dispatcher *d, struct bpf_prog *f, struct bpf_prog *t)
+{
+}
+#endif
@@ -19,6 +19,16 @@ config NETFILTER_FAMILY_BRIDGE
config NETFILTER_FAMILY_ARP
bool
+config HAVE_NF_HOOK_BPF
+ bool
+
+config NF_HOOK_BPF
+ bool "netfilter base hook bpf translator"
+ depends on BPF_JIT
+ help
+ This partially unrolls nf_hook_slow interpreter loop with
+ auto-generated BPF programs.
+
config NETFILTER_NETLINK_HOOK
tristate "Netfilter base hook dump support"
depends on NETFILTER_ADVANCED
@@ -16,6 +16,7 @@ nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
obj-$(CONFIG_NETFILTER) = netfilter.o
+obj-$(CONFIG_NF_HOOK_BPF) += nf_hook_bpf.o
obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
@@ -24,6 +24,7 @@
#include <linux/rcupdate.h>
#include <net/net_namespace.h>
#include <net/netfilter/nf_queue.h>
+#include <net/netfilter/nf_hook_bpf.h>
#include <net/sock.h>
#include "nf_internals.h"
@@ -47,6 +48,12 @@ static DEFINE_MUTEX(nf_hook_mutex);
#define nf_entry_dereference(e) \
rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+DEFINE_BPF_DISPATCHER(nf_hook_base);
+
+static struct bpf_prog *fallback_nf_hook_slow;
+#endif
+
static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
{
struct nf_hook_entries *e;
@@ -58,9 +65,25 @@ static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
if (num == 0)
return NULL;
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+ if (!fallback_nf_hook_slow) {
+ /* never free'd */
+ fallback_nf_hook_slow = nf_hook_bpf_create_fb();
+
+ if (!fallback_nf_hook_slow)
+ return NULL;
+ }
+#endif
+
e = kvzalloc(alloc, GFP_KERNEL);
- if (e)
- e->num_hook_entries = num;
+ if (!e)
+ return NULL;
+
+ e->num_hook_entries = num;
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+ e->hook_prog = fallback_nf_hook_slow;
+#endif
+
return e;
}
@@ -104,6 +127,7 @@ nf_hook_entries_grow(const struct nf_hook_entries *old,
{
unsigned int i, alloc_entries, nhooks, old_entries;
struct nf_hook_ops **orig_ops = NULL;
+ struct bpf_prog *hook_bpf_prog;
struct nf_hook_ops **new_ops;
struct nf_hook_entries *new;
bool inserted = false;
@@ -156,6 +180,27 @@ nf_hook_entries_grow(const struct nf_hook_entries *old,
new->hooks[nhooks].priv = reg->priv;
}
+ hook_bpf_prog = nf_hook_bpf_create(new);
+
+ /* XXX: jit failure handling?
+ * We could refuse hook registration.
+ *
+ * For now, allocate_hook_entries_size() sets
+ * ->hook_prog to a small fallback program that
+ * calls nf_hook_slow().
+ */
+ if (hook_bpf_prog) {
+ struct bpf_prog *old_prog = NULL;
+
+ new->hook_prog = hook_bpf_prog;
+
+ if (old)
+ old_prog = old->hook_prog;
+
+ nf_hook_bpf_change_prog(BPF_DISPATCHER_PTR(nf_hook_base),
+ old_prog, hook_bpf_prog);
+ }
+
return new;
}
@@ -221,6 +266,7 @@ static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old,
struct nf_hook_entries __rcu **pp)
{
unsigned int i, j, skip = 0, hook_entries;
+ struct bpf_prog *hook_bpf_prog = NULL;
struct nf_hook_entries *new = NULL;
struct nf_hook_ops **orig_ops;
struct nf_hook_ops **new_ops;
@@ -244,8 +290,15 @@ static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old,
hook_entries -= skip;
new = allocate_hook_entries_size(hook_entries);
- if (!new)
+ if (!new) {
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+ struct bpf_prog *old_prog = old->hook_prog;
+
+ WRITE_ONCE(old->hook_prog, fallback_nf_hook_slow);
+ nf_hook_bpf_change_prog(BPF_DISPATCHER_PTR(nf_hook_base), old_prog, NULL);
+#endif
return NULL;
+ }
new_ops = nf_hook_entries_get_hook_ops(new);
for (i = 0, j = 0; i < old->num_hook_entries; i++) {
@@ -256,7 +309,16 @@ static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old,
j++;
}
hooks_validate(new);
+
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+ /* if this fails fallback prog calls nf_hook_slow. */
+ hook_bpf_prog = nf_hook_bpf_create(new);
+ if (hook_bpf_prog)
+ new->hook_prog = hook_bpf_prog;
+#endif
out_assign:
+ nf_hook_bpf_change_prog(BPF_DISPATCHER_PTR(nf_hook_base),
+ old ? old->hook_prog : NULL, hook_bpf_prog);
rcu_assign_pointer(*pp, new);
return old;
}
@@ -584,6 +646,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
int ret;
state->skb = skb;
+
for (; s < e->num_hook_entries; s++) {
verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
switch (verdict & NF_VERDICT_MASK) {
@@ -764,6 +827,11 @@ int __init netfilter_init(void)
if (ret < 0)
goto err_pernet;
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+ fallback_nf_hook_slow = nf_hook_bpf_create_fb();
+ WARN_ON_ONCE(!fallback_nf_hook_slow);
+#endif
+
return 0;
err_pernet:
unregister_pernet_subsys(&netfilter_net_ops);
new file mode 100644
@@ -0,0 +1,425 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/string.h>
+#include <linux/hashtable.h>
+#include <linux/jhash.h>
+#include <linux/netfilter.h>
+
+#include <net/netfilter/nf_queue.h>
+
+/* BPF translator for netfilter hooks.
+ *
+ * Copyright (c) 2021 Red Hat GmbH
+ *
+ * Author: Florian Westphal <fw@strlen.de>
+ *
+ * Unroll nf_hook_slow interpreter loop into an equivalent bpf
+ * program that can be called *instead* of nf_hook_slow().
+ * This program thus has same return value as nf_hook_slow and
+ * handles nfqueue and packet drops internally.
+ *
+ * These bpf programs are called/run from nf_hook() inline function.
+ *
+ * Register usage is:
+ *
+ * BPF_REG_0: verdict.
+ * BPF_REG_1: struct nf_hook_state *
+ * BPF_REG_2: reserved as arg to nf_queue()
+ * BPF_REG_3: reserved as arg to nf_queue()
+ *
+ * Prologue storage:
+ * BPF_REG_6: copy of REG_1 (original struct nf_hook_state *)
+ * BPF_REG_7: copy of original state->priv value
+ * BPF_REG_8: hook_index. Inited to 0, increments on each hook call.
+ */
+
+#define JMP_INVALID 0
+#define JIT_SIZE_MAX 0xffff
+
+struct nf_hook_prog {
+ struct bpf_insn *insns;
+ unsigned int pos;
+};
+
+static bool emit(struct nf_hook_prog *p, struct bpf_insn insn)
+{
+ if (WARN_ON_ONCE(p->pos >= BPF_MAXINSNS))
+ return false;
+
+ p->insns[p->pos] = insn;
+ p->pos++;
+ return true;
+}
+
+static bool xlate_one_hook(struct nf_hook_prog *p,
+ const struct nf_hook_entries *e,
+ const struct nf_hook_entry *h)
+{
+ int width = bytes_to_bpf_size(sizeof(h->priv));
+
+ /* if priv is NULL, the called hookfn does not use the priv member. */
+ if (!h->priv)
+ goto emit_hook_call;
+
+ if (WARN_ON_ONCE(width < 0))
+ return false;
+
+ /* x = entries[s]->priv; */
+ if (!emit(p, BPF_LDX_MEM(width, BPF_REG_2, BPF_REG_7,
+ (unsigned long)&h->priv - (unsigned long)e)))
+ return false;
+
+ /* state->priv = x */
+ if (!emit(p, BPF_STX_MEM(width, BPF_REG_6, BPF_REG_2,
+ offsetof(struct nf_hook_state, priv))))
+ return false;
+
+emit_hook_call:
+ if (!emit(p, BPF_EMIT_CALL(h->hook)))
+ return false;
+
+ /* Only advance to next hook on ACCEPT verdict.
+ * Else, skip rest and move to tail.
+ *
+ * Postprocessing patches the jump offset to the
+ * correct position, after last hook.
+ */
+ if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, NF_ACCEPT, JMP_INVALID)))
+ return false;
+
+ return true;
+}
+
+static bool emit_mov_ptr_reg(struct nf_hook_prog *p, u8 dreg, u8 sreg)
+{
+ if (sizeof(void *) == sizeof(u64))
+ return emit(p, BPF_MOV64_REG(dreg, sreg));
+ if (sizeof(void *) == sizeof(u32))
+ return emit(p, BPF_MOV32_REG(dreg, sreg));
+
+ return false;
+}
+
+static bool do_prologue(struct nf_hook_prog *p)
+{
+ int width = bytes_to_bpf_size(sizeof(void *));
+
+ if (WARN_ON_ONCE(width < 0))
+ return false;
+
+ /* argument to program is a pointer to struct nf_hook_state, in BPF_REG_1. */
+ if (!emit_mov_ptr_reg(p, BPF_REG_6, BPF_REG_1))
+ return false;
+
+ if (!emit(p, BPF_LDX_MEM(width, BPF_REG_7, BPF_REG_1,
+ offsetof(struct nf_hook_state, priv))))
+ return false;
+
+ /* Could load state->hook_index here, but we don't support index > 0 for bpf call. */
+ if (!emit(p, BPF_MOV32_IMM(BPF_REG_8, 0)))
+ return false;
+
+ return true;
+}
+
+static void patch_hook_jumps(struct nf_hook_prog *p)
+{
+ unsigned int i;
+
+ if (!p->insns)
+ return;
+
+ for (i = 0; i < p->pos; i++) {
+ if (BPF_CLASS(p->insns[i].code) != BPF_JMP)
+ continue;
+
+ if (p->insns[i].code == (BPF_EXIT | BPF_JMP))
+ continue;
+ if (p->insns[i].code == (BPF_CALL | BPF_JMP))
+ continue;
+
+ if (p->insns[i].off != JMP_INVALID)
+ continue;
+ p->insns[i].off = p->pos - i - 1;
+ }
+}
+
+static bool emit_retval(struct nf_hook_prog *p, int retval)
+{
+ if (!emit(p, BPF_MOV32_IMM(BPF_REG_0, retval)))
+ return false;
+
+ return emit(p, BPF_EXIT_INSN());
+}
+
+static bool emit_nf_hook_slow(struct nf_hook_prog *p)
+{
+ int width = bytes_to_bpf_size(sizeof(void *));
+
+ /* restore the original state->priv. */
+ if (!emit(p, BPF_STX_MEM(width, BPF_REG_6, BPF_REG_7,
+ offsetof(struct nf_hook_state, priv))))
+ return false;
+
+ /* arg1 is state->skb */
+ if (!emit(p, BPF_LDX_MEM(width, BPF_REG_1, BPF_REG_6,
+ offsetof(struct nf_hook_state, skb))))
+ return false;
+
+ /* arg2 is "struct nf_hook_state *" */
+ if (!emit(p, BPF_MOV64_REG(BPF_REG_2, BPF_REG_6)))
+ return false;
+
+ /* arg3 is nf_hook_entries (original state->priv) */
+ if (!emit(p, BPF_MOV64_REG(BPF_REG_3, BPF_REG_7)))
+ return false;
+
+ if (!emit(p, BPF_EMIT_CALL(nf_hook_slow)))
+ return false;
+
+ /* No further action needed, return retval provided by nf_hook_slow */
+ return emit(p, BPF_EXIT_INSN());
+}
+
+static bool emit_nf_queue(struct nf_hook_prog *p)
+{
+ int width = bytes_to_bpf_size(sizeof(void *));
+
+ if (width < 0) {
+ WARN_ON_ONCE(1);
+ return false;
+ }
+
+ /* int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, unsigned int verdict) */
+ if (!emit(p, BPF_LDX_MEM(width, BPF_REG_1, BPF_REG_6, offsetof(struct nf_hook_state, skb))))
+ return false;
+ if (!emit(p, BPF_STX_MEM(BPF_H, BPF_REG_6, BPF_REG_8,
+ offsetof(struct nf_hook_state, hook_index))))
+ return false;
+ /* arg2: struct nf_hook_state * */
+ if (!emit(p, BPF_MOV64_REG(BPF_REG_2, BPF_REG_6)))
+ return false;
+ /* arg3: original hook return value: (NUM << NF_VERDICT_QBITS | NF_QUEUE) */
+ if (!emit(p, BPF_MOV32_REG(BPF_REG_3, BPF_REG_0)))
+ return false;
+ if (!emit(p, BPF_EMIT_CALL(nf_queue)))
+ return false;
+
+ /* Check nf_queue return value. Abnormal case: nf_queue returned != 0.
+ *
+ * Fall back to nf_hook_slow().
+ */
+ if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2)))
+ return false;
+
+ /* Normal case: skb was stolen. Return 0. */
+ return emit_retval(p, 0);
+}
+
+static bool do_epilogue_base_hooks(struct nf_hook_prog *p)
+{
+ int width = bytes_to_bpf_size(sizeof(void *));
+
+ if (WARN_ON_ONCE(width < 0))
+ return false;
+
+ /* last 'hook'. We arrive here if previous hook returned ACCEPT,
+ * i.e. all hooks passed -- we are done.
+ *
+ * Return 1, skb can continue traversing network stack.
+ */
+ if (!emit_retval(p, 1))
+ return false;
+
+ /* Patch all hook jumps, in case any of these are taken
+ * we need to jump to this location.
+ *
+ * This happens when verdict is != ACCEPT.
+ */
+ patch_hook_jumps(p);
+
+ /* need to ignore upper 24 bits, might contain errno or queue number */
+ if (!emit(p, BPF_MOV32_REG(BPF_REG_3, BPF_REG_0)))
+ return false;
+ if (!emit(p, BPF_ALU32_IMM(BPF_AND, BPF_REG_3, 0xff)))
+ return false;
+
+ /* ACCEPT handled, check STOLEN. */
+ if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_3, NF_STOLEN, 2)))
+ return false;
+
+ if (!emit_retval(p, 0))
+ return false;
+
+ /* ACCEPT and STOLEN handled. Check DROP next */
+ if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_3, NF_DROP, 1 + 2 + 2 + 2 + 2)))
+ return false;
+
+ /* First step. Extract the errno number. 1 insn. */
+ if (!emit(p, BPF_ALU32_IMM(BPF_RSH, BPF_REG_0, NF_VERDICT_QBITS)))
+ return false;
+
+ /* Second step: replace errno with EPERM if it was 0. 2 insns. */
+ if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1)))
+ return false;
+ if (!emit(p, BPF_MOV32_IMM(BPF_REG_0, EPERM)))
+ return false;
+
+ /* Third step: negate reg0: Caller expects -EFOO and stash the result. 2 insns. */
+ if (!emit(p, BPF_ALU32_IMM(BPF_NEG, BPF_REG_0, 0)))
+ return false;
+ if (!emit(p, BPF_MOV32_REG(BPF_REG_8, BPF_REG_0)))
+ return false;
+
+ /* Fourth step: free the skb. 2 insns. */
+ if (!emit(p, BPF_LDX_MEM(width, BPF_REG_1, BPF_REG_6, offsetof(struct nf_hook_state, skb))))
+ return false;
+ if (!emit(p, BPF_EMIT_CALL(kfree_skb)))
+ return false;
+
+ /* Last step: return. 2 insns. */
+ if (!emit(p, BPF_MOV32_REG(BPF_REG_0, BPF_REG_8)))
+ return false;
+ if (!emit(p, BPF_EXIT_INSN()))
+ return false;
+
+ /* ACCEPT, STOLEN and DROP have been handled.
+ * REPEAT and STOP are not allowed anymore for individual hook functions.
+ * This leaves NFQUEUE as only remaing return value.
+ *
+ * In this case BPF_REG_0 still contains the original verdict of
+ * '(NUM << NF_VERDICT_QBITS | NF_QUEUE)', so pass it to nf_queue() as-is.
+ */
+ if (!emit_nf_queue(p))
+ return false;
+
+ /* Increment hook index and store it in nf_hook_state so nf_hook_slow will
+ * start at the next hook, if any.
+ */
+ if (!emit(p, BPF_ALU32_IMM(BPF_ADD, BPF_REG_8, 1)))
+ return false;
+ if (!emit(p, BPF_STX_MEM(BPF_H, BPF_REG_6, BPF_REG_8,
+ offsetof(struct nf_hook_state, hook_index))))
+ return false;
+
+ return emit_nf_hook_slow(p);
+}
+
+static int nf_hook_prog_init(struct nf_hook_prog *p)
+{
+ memset(p, 0, sizeof(*p));
+
+ p->insns = kcalloc(BPF_MAXINSNS, sizeof(*p->insns), GFP_KERNEL);
+ if (!p->insns)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void nf_hook_prog_free(struct nf_hook_prog *p)
+{
+ kfree(p->insns);
+}
+
+static int xlate_base_hooks(struct nf_hook_prog *p, const struct nf_hook_entries *e)
+{
+ unsigned int i, len;
+
+ len = e->num_hook_entries;
+
+ if (!do_prologue(p))
+ goto out;
+
+ for (i = 0; i < len; i++) {
+ if (!xlate_one_hook(p, e, &e->hooks[i]))
+ goto out;
+
+ if (i + 1 < len) {
+ if (!emit(p, BPF_MOV64_REG(BPF_REG_1, BPF_REG_6)))
+ goto out;
+
+ if (!emit(p, BPF_ALU32_IMM(BPF_ADD, BPF_REG_8, 1)))
+ goto out;
+ }
+ }
+
+ if (!do_epilogue_base_hooks(p))
+ goto out;
+
+ return 0;
+out:
+ return -EINVAL;
+}
+
+static struct bpf_prog *nf_hook_jit_compile(struct bpf_insn *insns, unsigned int len)
+{
+ struct bpf_prog *prog;
+ int err = 0;
+
+ prog = bpf_prog_alloc(bpf_prog_size(len), 0);
+ if (!prog)
+ return NULL;
+
+ prog->len = len;
+ prog->type = BPF_PROG_TYPE_SOCKET_FILTER;
+ memcpy(prog->insnsi, insns, prog->len * sizeof(struct bpf_insn));
+
+ prog = bpf_prog_select_runtime(prog, &err);
+ if (err) {
+ bpf_prog_free(prog);
+ return NULL;
+ }
+
+ return prog;
+}
+
+/* fallback program, invokes nf_hook_slow interpreter.
+ *
+ * Used when a hook is unregsitered and new program cannot
+ * be compiled for some reason.
+ */
+struct bpf_prog *nf_hook_bpf_create_fb(void)
+{
+ struct bpf_prog *prog;
+ struct nf_hook_prog p;
+ int err;
+
+ err = nf_hook_prog_init(&p);
+ if (err)
+ return NULL;
+
+ if (!do_prologue(&p))
+ goto err;
+
+ if (!emit_nf_hook_slow(&p))
+ goto err;
+
+ prog = nf_hook_jit_compile(p.insns, p.pos);
+err:
+ nf_hook_prog_free(&p);
+ return prog;
+}
+
+struct bpf_prog *nf_hook_bpf_create(const struct nf_hook_entries *new)
+{
+ struct bpf_prog *prog;
+ struct nf_hook_prog p;
+ int err;
+
+ err = nf_hook_prog_init(&p);
+ if (err)
+ return NULL;
+
+ err = xlate_base_hooks(&p, new);
+ if (err)
+ goto err;
+
+ prog = nf_hook_jit_compile(p.insns, p.pos);
+err:
+ nf_hook_prog_free(&p);
+ return prog;
+}
+
+void nf_hook_bpf_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to)
+{
+ bpf_dispatcher_change_prog(d, from, to);
+}
Add a kernel bpf program generator for netfilter base hooks. Currently netfilter hooks are invoked by nf_hook_slow: for i in hooks; do verdict = hooks[i]->indirect_func(hooks->[i].hook_arg, skb, state); switch (verdict) { .... The autogenerator unrolls the loop, so we get: state->priv = hooks->[0].hook_arg; v = first_hook_function(state); if (v != ACCEPT) goto done; state->priv = hooks->[1].hook_arg; v = second_hook_function(state); ... Indirections are replaced by direct calls. Invocation of the autogenerated programs is done via bpf dispatcher from nf_hook(). The autogenerated program has the same return value scheme as nf_hook_slow(). NF_HOOK() points are converted to call the autogenerated bpf program instead of nf_hook_slow(). Purpose of this is to eventually add a 'netfilter prog type' to bpf and permit attachment of (userspace generated) bpf programs to the netfilter machinery, e.g. 'attach bpf prog id 1234 to ipv6 PREROUTING at prio -300'. This will require to expose the context structure (program argument, '__nf_hook_state', with rewriting accesses to match nf_hook_state layout. TODO: 1. Test !x86_64. 2. Test bridge family. Future work: add support for NAT hooks, they still use indirect calls, but those are less of a problem because these get called only once per connection. Could annotate ops struct as to what kind of verdicts the C function can return. This would allow to elide retval check when hook can only return NF_ACCEPT. Could add extra support for INGRESS hook to move more code from inline functions to the autogenerated program. Signed-off-by: Florian Westphal <fw@strlen.de> --- include/linux/netfilter.h | 56 ++++ include/net/netfilter/nf_hook_bpf.h | 14 + net/netfilter/Kconfig | 10 + net/netfilter/Makefile | 1 + net/netfilter/core.c | 74 ++++- net/netfilter/nf_hook_bpf.c | 425 ++++++++++++++++++++++++++++ 6 files changed, 577 insertions(+), 3 deletions(-) create mode 100644 include/net/netfilter/nf_hook_bpf.h create mode 100644 net/netfilter/nf_hook_bpf.c