diff mbox series

[RFC,5/5] bpf: Sample BPF program to set oom policy

Message ID 20230727073632.44983-6-zhouchuyi@bytedance.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series mm: Select victim memcg using BPF_OOM_POLICY | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch, async
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-PR success PR summary
bpf/vmtest-bpf-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-4 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-5 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-6 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-3 success Logs for build for s390x with gcc
bpf/vmtest-bpf-VM_Test-7 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-9 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-10 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-11 fail Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-13 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-14 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-15 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-17 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-18 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-19 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-20 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-21 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-22 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-23 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-24 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-25 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-26 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-27 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-28 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-29 success Logs for veristat
bpf/vmtest-bpf-VM_Test-16 fail Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-12 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-VM_Test-8 success Logs for test_maps on s390x with gcc

Commit Message

Chuyi Zhou July 27, 2023, 7:36 a.m. UTC
This patch adds a sample showing how to set a OOM victim selection policy
to protect certain cgroups.

The BPF program, oom_kern.c, compares the score of two sibling memcg and
selects the larger one. The userspace program oom_user.c maintains a score
map by using cgroup inode number as the keys and the scores as the values.
Users can set lower score for some cgroups compared to their siblings to
avoid being selected.

Suggested-by: Abel Wu <wuyun.abel@bytedance.com>
Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com>
---
 samples/bpf/Makefile   |   3 +
 samples/bpf/oom_kern.c |  42 ++++++++++++++
 samples/bpf/oom_user.c | 128 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 173 insertions(+)
 create mode 100644 samples/bpf/oom_kern.c
 create mode 100644 samples/bpf/oom_user.c
diff mbox series

Patch

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 615f24ebc49c..09dbdec22dad 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -56,6 +56,7 @@  tprogs-y += xdp_redirect_map_multi
 tprogs-y += xdp_redirect_map
 tprogs-y += xdp_redirect
 tprogs-y += xdp_monitor
+tprogs-y += oom
 
 # Libbpf dependencies
 LIBBPF_SRC = $(TOOLS_PATH)/lib/bpf
@@ -118,6 +119,7 @@  xdp_redirect_map-objs := xdp_redirect_map_user.o $(XDP_SAMPLE)
 xdp_redirect-objs := xdp_redirect_user.o $(XDP_SAMPLE)
 xdp_monitor-objs := xdp_monitor_user.o $(XDP_SAMPLE)
 xdp_router_ipv4-objs := xdp_router_ipv4_user.o $(XDP_SAMPLE)
+oom-objs := oom_user.o
 
 # Tell kbuild to always build the programs
 always-y := $(tprogs-y)
@@ -173,6 +175,7 @@  always-y += xdp_sample_pkts_kern.o
 always-y += ibumad_kern.o
 always-y += hbm_out_kern.o
 always-y += hbm_edt_kern.o
+always-y += oom_kern.o
 
 ifeq ($(ARCH), arm)
 # Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux
diff --git a/samples/bpf/oom_kern.c b/samples/bpf/oom_kern.c
new file mode 100644
index 000000000000..1e0e2de1e06e
--- /dev/null
+++ b/samples/bpf/oom_kern.c
@@ -0,0 +1,42 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <uapi/linux/bpf.h>
+#include <linux/version.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1024);
+	__type(key, u64);
+	__type(value, u32);
+} sc_map SEC(".maps");
+
+SEC("oom_policy")
+int bpf_prog1(struct bpf_oom_ctx *ctx)
+{
+	u64 cg_ino_1, cg_ino_2;
+	u32 cs_1, sc_2;
+	u32 *value;
+
+	cs_1 = sc_2 = 250;
+	cg_ino_1 = bpf_get_ino_from_cgroup_id(ctx->cg_id_1);
+	cg_ino_2 = bpf_get_ino_from_cgroup_id(ctx->cg_id_2);
+
+	value = bpf_map_lookup_elem(&sc_map, &cg_ino_1);
+	if (value)
+		cs_1 = *value;
+
+	value = bpf_map_lookup_elem(&sc_map, &cg_ino_2);
+	if (value)
+		sc_2 = *value;
+
+	if (cs_1 > sc_2)
+		ctx->cmp_ret = BPF_OOM_CMP_GREATER;
+	else if (cs_1 < sc_2)
+		ctx->cmp_ret = BPF_OOM_CMP_LESS;
+	else
+		ctx->cmp_ret = BPF_OOM_CMP_EQUAL;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/oom_user.c b/samples/bpf/oom_user.c
new file mode 100644
index 000000000000..7bd2d56ba910
--- /dev/null
+++ b/samples/bpf/oom_user.c
@@ -0,0 +1,128 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include "trace_helpers.h"
+
+static int map_fd, prog_fd;
+
+static unsigned long long get_cgroup_inode(const char *path)
+{
+	unsigned long long inode;
+	struct stat file_stat;
+	int fd, ret;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return 0;
+
+	ret = fstat(fd, &file_stat);
+	if (ret < 0)
+		return 0;
+
+	inode = file_stat.st_ino;
+	close(fd);
+	return inode;
+}
+
+static int set_cgroup_oom_score(const char *cg_path, int score)
+{
+	unsigned long long ino = get_cgroup_inode(cg_path);
+
+	if (!ino) {
+		fprintf(stderr, "ERROR: get inode for %s failed\n", cg_path);
+		return 1;
+	}
+	if (bpf_map_update_elem(map_fd, &ino, &score, BPF_ANY)) {
+		fprintf(stderr, "ERROR: update map failed\n");
+		return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * A simple sample of prefer select /root/blue/instance_1 as victim memcg
+ * and protect /root/blue/instance_2
+ *           root
+ *       /         \
+ *     user ...    blue
+ *     /  \        /     \
+ *     ..     instance_1  instance_2
+ */
+
+int main(int argc, char **argv)
+{
+	struct bpf_object *obj = NULL;
+	struct bpf_program *prog;
+	int target_fd = 0;
+	unsigned int prog_cnt;
+
+	obj = bpf_object__open_file("oom_kern.o", NULL);
+	if (libbpf_get_error(obj)) {
+		fprintf(stderr, "ERROR: opening BPF object file failed\n");
+		obj = NULL;
+		goto cleanup;
+	}
+
+	prog = bpf_object__next_program(obj, NULL);
+	bpf_program__set_type(prog, BPF_PROG_TYPE_OOM_POLICY);
+	/* load BPF program */
+	if (bpf_object__load(obj)) {
+		fprintf(stderr, "ERROR: loading BPF object file failed\n");
+		goto cleanup;
+	}
+
+	map_fd = bpf_object__find_map_fd_by_name(obj, "sc_map");
+
+	if (map_fd < 0) {
+		fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+		goto cleanup;
+	}
+
+    /*
+     *  In this sample, default score is 250 (see oom_kern.c).
+     *  set high score for /blue and /blue/instance_1,
+     *  so when global oom happened, /blue/instance_1 would
+     *  be chosed as victim memcg
+     */
+	if (set_cgroup_oom_score("/sys/fs/cgroup/blue/", 500)) {
+		fprintf(stderr, "ERROR: set score for /blue failed\n");
+		goto cleanup;
+	}
+	if (set_cgroup_oom_score("/sys/fs/cgroup/blue/instance_1", 500)) {
+		fprintf(stderr, "ERROR: set score for /blue/instance_2 failed\n");
+		goto cleanup;
+	}
+
+	/* set low score to protect /blue/instance_2 */
+	if (set_cgroup_oom_score("/sys/fs/cgroup/blue/instance_2", 100)) {
+		fprintf(stderr, "ERROR: set score for /blue/instance_1 failed\n");
+		goto cleanup;
+	}
+
+	prog_fd = bpf_program__fd(prog);
+
+	/* Attach bpf program */
+	if (bpf_prog_attach(prog_fd, target_fd, BPF_OOM_POLICY, 0)) {
+		fprintf(stderr, "Failed to attach BPF_OOM_POLICY program");
+		goto cleanup;
+	}
+	if (bpf_prog_query(target_fd, BPF_OOM_POLICY, 0, NULL, NULL, &prog_cnt)) {
+		fprintf(stderr, "Failed to query attached programs\n");
+		goto cleanup;
+	}
+	printf("prog_cnt: %d\n", prog_cnt);
+
+cleanup:
+	bpf_object__close(obj);
+	return 0;
+}