diff mbox series

[bpf-next,v3,12/16] bpfilter: add table structure

Message ID 20221224000402.476079-13-qde@naccy.de (mailing list archive)
State New
Headers show
Series bpfilter | expand

Commit Message

Quentin Deslandes Dec. 24, 2022, 12:03 a.m. UTC
The table_ops structure describes a set of operations for an individual
table type.

Tables support the following set of operations:
- create: create an instance of a table from an ipt_replace blob.
- codegen: generate eBPF bytecode for a table.
- install: load BPF maps, progs, and attach them.
- uninstall: detach loaded BPF maps and progs, and unload them.
- free: free all resources used by a table.

Each table keeps an instance of iptables' table blob and an array of
rules for this blob. The array of rules provides a more convenient way
to interact with the blob's entries, while having a copy of the blob
will ease communication with iptables.

All tables created are stored in a map, used for lookups. Also, all
tables are linked into a list to ease cleanup.

Co-developed-by: Dmitrii Banshchikov <me@ubique.spb.ru>
Signed-off-by: Dmitrii Banshchikov <me@ubique.spb.ru>
Signed-off-by: Quentin Deslandes <qde@naccy.de>
---
 net/bpfilter/Makefile                         |   2 +-
 net/bpfilter/context.c                        |  64 +++
 net/bpfilter/context.h                        |   4 +
 net/bpfilter/table.c                          | 391 ++++++++++++++++++
 net/bpfilter/table.h                          |  59 +++
 tools/testing/selftests/bpf/bpfilter/Makefile |   2 +-
 6 files changed, 520 insertions(+), 2 deletions(-)
 create mode 100644 net/bpfilter/table.c
 create mode 100644 net/bpfilter/table.h
diff mbox series

Patch

diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
index 759fb6c847d1..9f5b46c70a41 100644
--- a/net/bpfilter/Makefile
+++ b/net/bpfilter/Makefile
@@ -13,7 +13,7 @@  $(LIBBPF_A):
 userprogs := bpfilter_umh
 bpfilter_umh-objs := main.o logger.o map-common.o
 bpfilter_umh-objs += context.o codegen.o
-bpfilter_umh-objs += match.o xt_udp.o target.o rule.o
+bpfilter_umh-objs += match.o xt_udp.o target.o rule.o table.o
 bpfilter_umh-userldlibs := $(LIBBPF_A) -lelf -lz
 userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi
 
diff --git a/net/bpfilter/context.c b/net/bpfilter/context.c
index ac07b678baa7..81c9751a2a2d 100644
--- a/net/bpfilter/context.c
+++ b/net/bpfilter/context.c
@@ -9,6 +9,7 @@ 
 #include "context.h"
 
 #include <linux/kernel.h>
+#include <linux/list.h>
 
 #include <string.h>
 
@@ -72,6 +73,39 @@  static int init_target_ops_map(struct context *ctx)
 	return 0;
 }
 
+static const struct table_ops *table_ops[] = {};
+
+static int init_table_ops_map(struct context *ctx)
+{
+	int r;
+
+	r = create_map(&ctx->table_ops_map, ARRAY_SIZE(table_ops));
+	if (r) {
+		BFLOG_ERR("failed to create tables map: %s", STRERR(r));
+		return r;
+	}
+
+	for (int i = 0; i < ARRAY_SIZE(table_ops); ++i) {
+		const struct table_ops *t = table_ops[i];
+
+		r = map_upsert(&ctx->table_ops_map, t->name, (void *)t);
+		if (r) {
+			BFLOG_ERR("failed to upsert in tables map: %s",
+				  STRERR(r));
+			return r;
+		}
+	}
+
+	return 0;
+}
+
+static int init_table_index(struct context *ctx)
+{
+	INIT_LIST_HEAD(&ctx->table_index.list);
+
+	return create_map(&ctx->table_index.map, ARRAY_SIZE(table_ops));
+}
+
 int create_context(struct context *ctx)
 {
 	int r;
@@ -88,8 +122,26 @@  int create_context(struct context *ctx)
 		goto err_free_match_ops_map;
 	}
 
+	r = init_table_ops_map(ctx);
+	if (r) {
+		BFLOG_ERR("failed to initialize tables map: %s", STRERR(r));
+		goto err_free_target_ops_map;
+	}
+
+	r = init_table_index(ctx);
+	if (r) {
+		BFLOG_ERR("failed to initialize tables index: %s", STRERR(r));
+		goto err_free_table_ops_map;
+	}
+
 	return 0;
 
+err_free_table_ops_map:
+	free_map(&ctx->table_ops_map);
+
+err_free_target_ops_map:
+	free_map(&ctx->target_ops_map);
+
 err_free_match_ops_map:
 	free_map(&ctx->match_ops_map);
 
@@ -98,6 +150,18 @@  int create_context(struct context *ctx)
 
 void free_context(struct context *ctx)
 {
+	struct list_head *t;
+	struct list_head *n;
+
+	list_for_each_safe(t, n, &ctx->table_index.list) {
+		struct table *table;
+
+		table = list_entry(t, struct table, list);
+		table->table_ops->uninstall(ctx, table);
+		table->table_ops->free(table);
+	}
+	free_map(&ctx->table_index.map);
+	free_map(&ctx->table_ops_map);
 	free_map(&ctx->target_ops_map);
 	free_map(&ctx->match_ops_map);
 }
diff --git a/net/bpfilter/context.h b/net/bpfilter/context.h
index f9c34a9968b8..b0e91e37d057 100644
--- a/net/bpfilter/context.h
+++ b/net/bpfilter/context.h
@@ -9,9 +9,13 @@ 
 
 #include <search.h>
 
+#include "table.h"
+
 struct context {
 	struct hsearch_data match_ops_map;
 	struct hsearch_data target_ops_map;
+	struct hsearch_data table_ops_map;
+	struct table_index table_index;
 };
 
 int create_context(struct context *ctx);
diff --git a/net/bpfilter/table.c b/net/bpfilter/table.c
new file mode 100644
index 000000000000..4094c82c31de
--- /dev/null
+++ b/net/bpfilter/table.c
@@ -0,0 +1,391 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 Telegram FZ-LLC
+ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
+ */
+
+#define _GNU_SOURCE
+
+#include "table.h"
+
+#include <linux/err.h>
+#include <linux/list.h>
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "context.h"
+#include "logger.h"
+#include "rule.h"
+
+static int rule_offset_comparator(const void *x, const void *y)
+{
+	const struct rule *rule = y;
+
+	return x - (const void *)rule->ipt_entry;
+}
+
+static bool table_has_hook(const struct table *table, uint32_t hook)
+{
+	BUG_ON(hook >= BPFILTER_INET_HOOK_MAX);
+
+	return table->valid_hooks & (1 << hook);
+}
+
+static int table_init_rules(struct context *ctx, struct table *table,
+			    const struct bpfilter_ipt_replace *ipt_replace)
+{
+	uint32_t offset;
+
+	table->entries = malloc(table->size);
+	if (!table->entries) {
+		BFLOG_ERR("out of memory");
+		return -ENOMEM;
+	}
+
+	memcpy(table->entries, ipt_replace->entries, table->size);
+
+	table->rules = calloc(table->num_rules, sizeof(table->rules[0]));
+	if (!table->rules) {
+		BFLOG_ERR("out of memory");
+		return -ENOMEM;
+	}
+
+	offset = 0;
+	for (int i = 0; i < table->num_rules; ++i) {
+		const struct bpfilter_ipt_entry *ipt_entry;
+		int r;
+
+		if (table->size < offset + sizeof(*ipt_entry)) {
+			BFLOG_ERR("invalid table size: %d", table->size);
+			return -EINVAL;
+		}
+
+		ipt_entry = table->entries + offset;
+
+		if ((uintptr_t)ipt_entry % __alignof__(struct bpfilter_ipt_entry)) {
+			BFLOG_ERR("invalid alignment for struct ipt_entry");
+			return -EINVAL;
+		}
+
+		if (table->size < offset + ipt_entry->next_offset) {
+			BFLOG_ERR("invalid table size: %d", table->size);
+			return -EINVAL;
+		}
+
+		r = init_rule(ctx, ipt_entry, &table->rules[i]);
+		if (r) {
+			BFLOG_ERR("failed to initialize rule: %s",
+				  STRERR(r));
+			return r;
+		}
+
+		table->rules[i].ipt_entry = ipt_entry;
+		offset += ipt_entry->next_offset;
+	}
+
+	if (offset != ipt_replace->size) {
+		BFLOG_ERR("invalid final offset: %d", offset);
+		return -EINVAL;
+	}
+
+	if (table->num_rules != ipt_replace->num_entries) {
+		BFLOG_ERR("mismatch in number of rules: got %d, expected %d",
+			  table->num_rules, ipt_replace->num_entries);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int table_check_hooks(const struct table *table)
+{
+	uint32_t max_rule_front, max_rule_last;
+	bool check = false;
+
+	for (int i = 0; i < BPFILTER_INET_HOOK_MAX; ++i) {
+		if (!table_has_hook(table, i))
+			continue;
+
+		if (check) {
+			if (table->hook_entry[i] <= max_rule_front) {
+				BFLOG_ERR("invalid hook entry");
+				return -EINVAL;
+			}
+
+			if (table->underflow[i] <= max_rule_last) {
+				BFLOG_ERR("invalid underflow entry");
+				return -EINVAL;
+			}
+		}
+
+		max_rule_front = table->hook_entry[i];
+		max_rule_last = table->underflow[i];
+		check = true;
+	}
+
+	return 0;
+}
+
+static int table_init_hooks(struct table *table,
+			    const struct bpfilter_ipt_replace *ipt_replace)
+{
+	for (int i = 0; i < BPFILTER_INET_HOOK_MAX; ++i) {
+		struct rule *rule_front;
+		struct rule *rule_last;
+		int verdict;
+
+		if (!table_has_hook(table, i))
+			continue;
+
+		rule_front = table_find_rule_by_offset(table, ipt_replace->hook_entry[i]);
+		rule_last = table_find_rule_by_offset(table, ipt_replace->underflow[i]);
+
+		if (!rule_front || !rule_last) {
+			BFLOG_ERR("expected a first and last rule");
+			return -EINVAL;
+		}
+
+		if (!rule_is_unconditional(rule_last)) {
+			BFLOG_ERR("expected unconditional rule");
+			return -EINVAL;
+		}
+
+		if (!rule_has_standard_target(rule_last)) {
+			BFLOG_ERR("expected rule for a standard target");
+			return -EINVAL;
+		}
+
+		verdict = standard_target_verdict(rule_last->target.ipt_target);
+		if (verdict >= 0) {
+			BFLOG_ERR("expected a valid standard target verdict: %d",
+				  verdict);
+			return -EINVAL;
+		}
+
+		verdict = convert_verdict(verdict);
+
+		if (verdict != BPFILTER_NF_DROP && verdict != BPFILTER_NF_ACCEPT) {
+			BFLOG_ERR("verdict must be either NF_DROP or NF_ACCEPT");
+			return -EINVAL;
+		}
+
+		table->hook_entry[i] = rule_front - table->rules;
+		table->underflow[i] = rule_last - table->rules;
+	}
+
+	return table_check_hooks(table);
+}
+
+static struct rule *next_rule(const struct table *table, struct rule *rule)
+{
+	const uint32_t i = rule - table->rules;
+
+	if (table->num_rules <= i + 1) {
+		BFLOG_ERR("rule index is out of range");
+		return ERR_PTR(-EINVAL);
+	}
+
+	++rule;
+	rule->came_from = i;
+
+	return rule;
+}
+
+static struct rule *backtrack_rule(const struct table *table, struct rule *rule)
+{
+	uint32_t i = rule - table->rules;
+	int prev_i;
+
+	do {
+		rule->hook_mask ^= (1 << BPFILTER_INET_HOOK_MAX);
+		prev_i = i;
+		i = rule->came_from;
+		rule->came_from = 0;
+
+		if (i == prev_i)
+			return NULL;
+
+		rule = &table->rules[i];
+	} while (prev_i == i + 1);
+
+	return next_rule(table, rule);
+}
+
+static int table_check_chain(struct table *table, uint32_t hook,
+			     struct rule *rule)
+{
+	uint32_t i = rule - table->rules;
+
+	rule->came_from = i;
+
+	for (;;) {
+		bool visited;
+		int verdict;
+
+		if (!rule)
+			return 0;
+
+		if (IS_ERR(rule))
+			return PTR_ERR(rule);
+
+		i = rule - table->rules;
+
+		if (table->num_rules <= i) {
+			BFLOG_ERR("rule index is out of range: %d", i);
+			return -EINVAL;
+		}
+
+		if (rule->hook_mask & (1 << BPFILTER_INET_HOOK_MAX)) {
+			BFLOG_ERR("hook index out of range");
+			return -EINVAL;
+		}
+
+		// already visited
+		visited = rule->hook_mask & (1 << hook);
+		rule->hook_mask |= (1 << hook) | (1 << BPFILTER_INET_HOOK_MAX);
+
+		if (visited) {
+			rule = backtrack_rule(table, rule);
+			continue;
+		}
+
+		if (!rule_has_standard_target(rule)) {
+			rule = next_rule(table, rule);
+			continue;
+		}
+
+		verdict = standard_target_verdict(rule->target.ipt_target);
+		if (verdict > 0) {
+			rule = table_find_rule_by_offset(table, verdict);
+			if (!rule) {
+				BFLOG_ERR("failed to find rule by offset");
+				return -EINVAL;
+			}
+
+			rule->came_from = i;
+			continue;
+		}
+
+		if (!rule_is_unconditional(rule)) {
+			rule = next_rule(table, rule);
+			continue;
+		}
+
+		rule = backtrack_rule(table, rule);
+	}
+
+	return 0;
+}
+
+static int table_check_chains(struct table *table)
+{
+	int r = 0;
+
+	for (int i = 0, r = 0; !r && i < BPFILTER_INET_HOOK_MAX; ++i) {
+		if (table_has_hook(table, i))
+			r = table_check_chain(table, i, &table->rules[table->hook_entry[i]]);
+	}
+
+	return r;
+}
+
+struct table *create_table(struct context *ctx,
+			   const struct bpfilter_ipt_replace *ipt_replace)
+{
+	struct table *table;
+	int r;
+
+	table = calloc(1, sizeof(*table));
+	if (!table) {
+		BFLOG_ERR("out of memory");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	INIT_LIST_HEAD(&table->list);
+	table->valid_hooks = ipt_replace->valid_hooks;
+	table->num_rules = ipt_replace->num_entries;
+	table->num_counters = ipt_replace->num_counters;
+	table->size = ipt_replace->size;
+
+	r = table_init_rules(ctx, table, ipt_replace);
+	if (r) {
+		BFLOG_ERR("failed to initialise table rules: %s", STRERR(r));
+		goto err_free;
+	}
+
+	r = table_init_hooks(table, ipt_replace);
+	if (r) {
+		BFLOG_ERR("failed to initialise table hooks: %s", STRERR(r));
+		goto err_free;
+	}
+
+	r = table_check_chains(table);
+	if (r) {
+		BFLOG_ERR("failed to check table chains: %s", STRERR(r));
+		goto err_free;
+	}
+
+	return table;
+
+err_free:
+	free_table(table);
+
+	return ERR_PTR(r);
+}
+
+struct rule *table_find_rule_by_offset(const struct table *table,
+				       uint32_t offset)
+{
+	const struct bpfilter_ipt_entry *key;
+
+	key = table->entries + offset;
+
+	return bsearch(key, table->rules, table->num_rules,
+		       sizeof(table->rules[0]), rule_offset_comparator);
+}
+
+void table_get_info(const struct table *table,
+		    struct bpfilter_ipt_get_info *info)
+{
+	snprintf(info->name, sizeof(info->name), "%s", table->table_ops->name);
+	info->valid_hooks = table->valid_hooks;
+
+	for (int i = 0; i < BPFILTER_INET_HOOK_MAX; ++i) {
+		const struct rule *rule_front, *rule_last;
+
+		if (!table_has_hook(table, i)) {
+			info->hook_entry[i] = 0;
+			info->underflow[i] = 0;
+			continue;
+		}
+
+		rule_front = &table->rules[table->hook_entry[i]];
+		rule_last = &table->rules[table->underflow[i]];
+		info->hook_entry[i] = (const void *)rule_front->ipt_entry - table->entries;
+		info->underflow[i] = (const void *)rule_last->ipt_entry - table->entries;
+	}
+
+	info->num_entries = table->num_rules;
+	info->size = table->size;
+}
+
+void free_table(struct table *table)
+{
+	if (!table)
+		return;
+
+	list_del(&table->list);
+
+	if (table->rules) {
+		for (int i = 0; i < table->num_rules; ++i)
+			free_rule(&table->rules[i]);
+		free(table->rules);
+	}
+
+	free(table->entries);
+	free(table);
+}
diff --git a/net/bpfilter/table.h b/net/bpfilter/table.h
new file mode 100644
index 000000000000..d683005e1755
--- /dev/null
+++ b/net/bpfilter/table.h
@@ -0,0 +1,59 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021 Telegram FZ-LLC
+ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
+ */
+
+#ifndef NET_BPFILTER_TABLE_H
+#define NET_BPFILTER_TABLE_H
+
+#include "../../include/uapi/linux/bpfilter.h"
+
+#include <linux/types.h>
+
+#include <search.h>
+#include <stdint.h>
+
+struct context;
+struct rule;
+struct table;
+
+struct table_ops {
+	char name[BPFILTER_XT_TABLE_MAXNAMELEN];
+	struct table *(*create)(struct context *ctx,
+				const struct bpfilter_ipt_replace *ipt_replace);
+	int (*codegen)(struct context *ctx, struct table *table);
+	int (*install)(struct context *ctx, struct table *table);
+	void (*uninstall)(struct context *ctx, struct table *table);
+	void (*free)(struct table *table);
+	void (*update_counters)(struct table *table);
+};
+
+struct table {
+	const struct table_ops *table_ops;
+	uint32_t valid_hooks;
+	uint32_t num_rules;
+	uint32_t num_counters;
+	uint32_t size;
+	uint32_t hook_entry[BPFILTER_INET_HOOK_MAX];
+	uint32_t underflow[BPFILTER_INET_HOOK_MAX];
+	struct rule *rules;
+	void *entries;
+	void *ctx;
+	struct list_head list;
+};
+
+struct table_index {
+	struct hsearch_data map;
+	struct list_head list;
+};
+
+struct table *create_table(struct context *ctx,
+			   const struct bpfilter_ipt_replace *ipt_replace);
+struct rule *table_find_rule_by_offset(const struct table *table,
+				       uint32_t offset);
+void table_get_info(const struct table *table,
+		    struct bpfilter_ipt_get_info *info);
+void free_table(struct table *table);
+
+#endif // NET_BPFILTER_TABLE_H
diff --git a/tools/testing/selftests/bpf/bpfilter/Makefile b/tools/testing/selftests/bpf/bpfilter/Makefile
index 4ef52bfe2d21..53634699d427 100644
--- a/tools/testing/selftests/bpf/bpfilter/Makefile
+++ b/tools/testing/selftests/bpf/bpfilter/Makefile
@@ -45,7 +45,7 @@  BPFILTER_RULE_SRCS := $(BPFILTERSRCDIR)/rule.c
 BPFILTER_COMMON_SRCS := $(BPFILTER_MAP_SRCS) $(BPFILTER_CODEGEN_SRCS)
 BPFILTER_COMMON_SRCS += $(BPFILTERSRCDIR)/context.c $(BPFILTERSRCDIR)/logger.c
 BPFILTER_COMMON_SRCS += $(BPFILTER_MATCH_SRCS) $(BPFILTER_TARGET_SRCS)
-BPFILTER_COMMON_SRCS += $(BPFILTER_RULE_SRCS)
+BPFILTER_COMMON_SRCS += $(BPFILTER_RULE_SRCS) $(BPFILTERSRCDIR)/table.c
 
 $(OUTPUT)/test_map: test_map.c $(BPFILTER_MAP_SRCS)
 $(OUTPUT)/test_match: test_match.c $(BPFILTER_COMMON_SRCS)