@@ -87,4 +87,9 @@ struct dentry;
*/
extern int bpf_get_dentry_xattr(struct dentry *dentry, const char *name,
struct bpf_dynptr *value_ptr) __ksym __weak;
+
+struct fanotify_fastpath_event;
+extern struct inode *bpf_fanotify_data_inode(struct fanotify_fastpath_event *event) __ksym __weak;
+extern void bpf_iput(struct inode *inode) __ksym __weak;
+extern bool bpf_is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) __ksym __weak;
#endif
@@ -24,6 +24,8 @@ CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_DUMMY=y
CONFIG_DYNAMIC_FTRACE=y
+CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_FASTPATH=y
CONFIG_FPROBE=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FUNCTION_ERROR_INJECTION=y
new file mode 100644
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/fanotify.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+
+#include <test_progs.h>
+
+#include "fan_fp.skel.h"
+
+#define TEST_FS "/tmp/"
+#define TEST_DIR "/tmp/fanotify_test/"
+
+static int create_test_subtree(void)
+{
+ int err;
+
+ err = mkdir(TEST_DIR, 0777);
+ if (err && errno != EEXIST)
+ return err;
+
+ return open(TEST_DIR, O_RDONLY);
+}
+
+static int create_fanotify_fd(void)
+{
+ int fanotify_fd, err;
+
+ fanotify_fd = fanotify_init(FAN_CLASS_NOTIF | FAN_REPORT_NAME | FAN_REPORT_DIR_FID,
+ O_RDONLY);
+
+ if (!ASSERT_OK_FD(fanotify_fd, "fanotify_init"))
+ return -1;
+
+ err = fanotify_mark(fanotify_fd, FAN_MARK_ADD | FAN_MARK_FILESYSTEM,
+ FAN_CREATE | FAN_OPEN | FAN_ONDIR | FAN_EVENT_ON_CHILD,
+ AT_FDCWD, TEST_FS);
+ if (!ASSERT_OK(err, "fanotify_mark")) {
+ close(fanotify_fd);
+ return -1;
+ }
+
+ return fanotify_fd;
+}
+
+static int attach_global_fastpath(int fanotify_fd)
+{
+ struct fanotify_fastpath_args args = {
+ .name = "_tmp_test_sub_tree",
+ .version = 1,
+ .flags = 0,
+ };
+
+ if (ioctl(fanotify_fd, FAN_IOC_ADD_FP, &args))
+ return -1;
+
+ return 0;
+}
+
+#define EVENT_BUFFER_SIZE 4096
+struct file_access_result {
+ char name_prefix[16];
+ bool accessed;
+} access_results[3] = {
+ {"aa", false},
+ {"bb", false},
+ {"cc", false},
+};
+
+static void update_access_results(char *name)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(access_results); i++) {
+ if (strcmp(name, access_results[i].name_prefix) == 0)
+ access_results[i].accessed = true;
+ }
+}
+
+static void parse_event(char *buffer, int len)
+{
+ struct fanotify_event_metadata *event =
+ (struct fanotify_event_metadata *) buffer;
+ struct fanotify_event_info_header *info;
+ struct fanotify_event_info_fid *fid;
+ struct file_handle *handle;
+ char *name;
+ int off;
+
+ for (; FAN_EVENT_OK(event, len); event = FAN_EVENT_NEXT(event, len)) {
+ for (off = sizeof(*event) ; off < event->event_len;
+ off += info->len) {
+ info = (struct fanotify_event_info_header *)
+ ((char *) event + off);
+ switch (info->info_type) {
+ case FAN_EVENT_INFO_TYPE_DFID_NAME:
+ fid = (struct fanotify_event_info_fid *) info;
+ handle = (struct file_handle *)&fid->handle;
+ name = (char *)handle + sizeof(*handle) + handle->handle_bytes;
+ update_access_results(name);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+}
+
+static void touch_file(const char *path)
+{
+ int fd;
+
+ fd = open(path, O_WRONLY|O_CREAT|O_NOCTTY|O_NONBLOCK, 0666);
+ if (!ASSERT_OK_FD(fd, "open"))
+ goto cleanup;
+ close(fd);
+cleanup:
+ unlink(path);
+}
+
+static void generate_and_test_event(int fanotify_fd, struct fan_fp *skel)
+{
+ char buffer[EVENT_BUFFER_SIZE];
+ int len, err, fd;
+
+ /* Open the dir, so initialize_subdir_root can work */
+ fd = open(TEST_DIR, O_RDONLY);
+ close(fd);
+
+ if (!ASSERT_EQ(skel->bss->initialized, true, "initialized"))
+ goto cleanup;
+
+ /* access /tmp/fanotify_test/aa, this will generate event */
+ touch_file(TEST_DIR "aa");
+
+ /* create /tmp/fanotify_test/subdir, this will get tag from the
+ * parent directory (added in the bpf program on fsnotify_mkdir)
+ */
+ err = mkdir(TEST_DIR "subdir", 0777);
+ ASSERT_OK(err, "mkdir");
+
+ /* access /tmp/fanotify_test/subdir/bb, this will generate event */
+ touch_file(TEST_DIR "subdir/bb");
+
+ /* access /tmp/cc, this will NOT generate event, as the BPF
+ * fastpath filtered this event out. (Because /tmp doesn't have
+ * the tag.)
+ */
+ touch_file(TEST_FS "cc");
+
+ /* read and parse the events */
+ len = read(fanotify_fd, buffer, EVENT_BUFFER_SIZE);
+ if (!ASSERT_GE(len, 0, "read event"))
+ goto cleanup;
+ parse_event(buffer, len);
+
+ /* verify we generated events for aa and bb, but filtered out the
+ * event for cc.
+ */
+ ASSERT_TRUE(access_results[0].accessed, "access aa");
+ ASSERT_TRUE(access_results[1].accessed, "access bb");
+ ASSERT_FALSE(access_results[2].accessed, "access cc");
+
+ /* Each touch_file() generates two events: FAN_CREATE then
+ * FAN_OPEN. The second event will hit cache.
+ * open(TEST_DIR) also hit cache, as we updated it cache for
+ * TEST_DIR from userspace.
+ * Therefore, we expect 4 cache hits: aa, bb, cc, and TEST_DIR.
+ */
+ ASSERT_EQ(skel->bss->cache_hit, 4, "cache_hit");
+
+cleanup:
+ rmdir(TEST_DIR "subdir");
+ rmdir(TEST_DIR);
+}
+
+/* This test shows a simplified logic that monitors a subtree. This is
+ * simplified as it doesn't handle all the scenarios, such as:
+ *
+ * 1) moving a subsubtree into/outof the being monitoring subtree;
+ * 2) mount point inside the being monitored subtree
+ *
+ * Therefore, this is not to show a way to reliably monitor a subtree.
+ * Instead, this is to test the functionalities of bpf based fastpath.
+ *
+ * Overview of the logic:
+ * 1. fanotify is created for the whole file system (/tmp);
+ * 2. A bpf map (inode_storage_map) is used to tag directories to
+ * monitor (starting from /tmp/fanotify_test);
+ * 3. On fsnotify_mkdir, thee tag is propagated to newly created sub
+ * directories (/tmp/fanotify_test/subdir);
+ * 4. The bpf fastpath checks whether the event happens in a directory
+ * with the tag. If yes, the event is sent to user space; otherwise,
+ * the event is dropped.
+ */
+static void test_monitor_subtree(void)
+{
+ struct bpf_link *link;
+ struct fan_fp *skel;
+ int test_root_fd;
+ int zero = 0;
+ int err, fanotify_fd;
+ struct stat st;
+
+ test_root_fd = create_test_subtree();
+
+ if (!ASSERT_OK_FD(test_root_fd, "create_test_subtree"))
+ return;
+
+ err = fstat(test_root_fd, &st);
+ if (!ASSERT_OK(err, "fstat test_root_fd"))
+ goto close_test_root_fd;
+
+ skel = fan_fp__open_and_load();
+
+ if (!ASSERT_OK_PTR(skel, "fan_fp__open_and_load"))
+ goto close_test_root_fd;
+
+ skel->bss->root_ino = st.st_ino;
+
+ /* Add tag to /tmp/fanotify_test/ */
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.inode_storage_map),
+ &test_root_fd, &zero, BPF_ANY);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto destroy_skel;
+ link = bpf_map__attach_struct_ops(skel->maps.bpf_fanotify_fastpath_ops);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops"))
+ goto destroy_skel;
+
+ fanotify_fd = create_fanotify_fd();
+ if (!ASSERT_OK_FD(fanotify_fd, "create_fanotify_fd"))
+ goto destroy_link;
+
+ err = attach_global_fastpath(fanotify_fd);
+ if (!ASSERT_OK(err, "attach_global_fastpath"))
+ goto close_fanotify_fd;
+
+ generate_and_test_event(fanotify_fd, skel);
+
+close_fanotify_fd:
+ close(fanotify_fd);
+
+destroy_link:
+ bpf_link__destroy(link);
+destroy_skel:
+ fan_fp__destroy(skel);
+
+close_test_root_fd:
+ close(test_root_fd);
+ rmdir(TEST_DIR);
+}
+
+void test_bpf_fanotify_fastpath(void)
+{
+ if (test__start_subtest("subtree"))
+ test_monitor_subtree();
+}
new file mode 100644
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+struct __dentry_kptr_value {
+ struct dentry __kptr * dentry;
+};
+
+/* subdir_root map holds a single dentry pointer to the subtree root.
+ * This pointer is used to call bpf_is_subdir().
+ */
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct __dentry_kptr_value);
+ __uint(max_entries, 1);
+} subdir_root SEC(".maps");
+
+/* inode_storage_map serves as cache for bpf_is_subdir(). inode local
+ * storage has O(1) access time. So this is preferred over calling
+ * bpf_is_subdir().
+ */
+struct {
+ __uint(type, BPF_MAP_TYPE_INODE_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} inode_storage_map SEC(".maps");
+
+unsigned long root_ino;
+bool initialized;
+
+/* This function initialize map subdir_root. The logic is a bit ungly.
+ * First, user space sets root_ino. Then a fanotify event is triggered.
+ * If the event dentry matches root_ino, we take a reference on the
+ * dentry and save it in subdir_root map. The reference will be freed on
+ * the termination of subdir_root map.
+ */
+static void initialize_subdir_root(struct fanotify_fastpath_event *fp_event)
+{
+ struct __dentry_kptr_value *v;
+ struct dentry *dentry, *old;
+ int zero = 0;
+
+ if (initialized)
+ return;
+
+ dentry = bpf_fanotify_data_dentry(fp_event);
+ if (!dentry)
+ return;
+
+ if (dentry->d_inode->i_ino != root_ino) {
+ bpf_dput(dentry);
+ return;
+ }
+
+ v = bpf_map_lookup_elem(&subdir_root, &zero);
+ if (!v) {
+ bpf_dput(dentry);
+ return;
+ }
+
+ old = bpf_kptr_xchg(&v->dentry, dentry);
+ if (old)
+ bpf_dput(old);
+ initialized = true;
+}
+
+int cache_hit;
+
+/* bpf_fp_handler is sleepable, as it calls bpf_dput() */
+SEC("struct_ops.s")
+int BPF_PROG(bpf_fp_handler,
+ struct fsnotify_group *group,
+ struct fanotify_fastpath_hook *fp_hook,
+ struct fanotify_fastpath_event *fp_event)
+{
+ struct __dentry_kptr_value *v;
+ struct dentry *dentry;
+ int zero = 0;
+ int *value;
+ int ret;
+
+ initialize_subdir_root(fp_event);
+
+ /* Before the subdir_root map is initialized, send all events to
+ * user space.
+ */
+ if (!initialized)
+ return FAN_FP_RET_SEND_TO_USERSPACE;
+
+ dentry = bpf_fanotify_data_dentry(fp_event);
+ if (!dentry)
+ return FAN_FP_RET_SEND_TO_USERSPACE;
+
+ /* If inode_storage_map has cached value, just return it */
+ value = bpf_inode_storage_get(&inode_storage_map, dentry->d_inode, 0, 0);
+ if (value) {
+ bpf_dput(dentry);
+ cache_hit++;
+ return *value;
+ }
+
+ /* Hold rcu read lock for bpf_is_subdir */
+ bpf_rcu_read_lock();
+ v = bpf_map_lookup_elem(&subdir_root, &zero);
+ if (!v || !v->dentry) {
+ /* This shouldn't happen, but we need this to pass
+ * the verifier.
+ */
+ ret = FAN_FP_RET_SEND_TO_USERSPACE;
+ goto out;
+ }
+
+ if (bpf_is_subdir(dentry, v->dentry))
+ ret = FAN_FP_RET_SEND_TO_USERSPACE;
+ else
+ ret = FAN_FP_RET_SKIP_EVENT;
+out:
+ bpf_rcu_read_unlock();
+
+ /* Save current result to the inode_storage_map */
+ value = bpf_inode_storage_get(&inode_storage_map, dentry->d_inode, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (value)
+ *value = ret;
+ bpf_dput(dentry);
+ return ret;
+}
+
+SEC("struct_ops")
+int BPF_PROG(bpf_fp_init, struct fanotify_fastpath_hook *hook, const char *args)
+{
+ return 0;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_fp_free, struct fanotify_fastpath_hook *hook)
+{
+}
+
+SEC(".struct_ops.link")
+struct fanotify_fastpath_ops bpf_fanotify_fastpath_ops = {
+ .fp_handler = (void *)bpf_fp_handler,
+ .fp_init = (void *)bpf_fp_init,
+ .fp_free = (void *)bpf_fp_free,
+ .name = "_tmp_test_sub_tree",
+};
+
+char _license[] SEC("license") = "GPL";
This test shows a simplified logic that monitors a subtree. This is simplified as it doesn't handle all the scenarios, such as: 1) moving a subsubtree into/outof the being monitoring subtree; 2) mount point inside the being monitored subtree Therefore, this is not to show a way to reliably monitor a subtree. Instead, this is to test the functionalities of bpf based fastpath. To really monitor a subtree reliably, we will need more complex logic. Overview of the logic: 1. fanotify is created for the whole file system (/tmp). 2. dentry of the subtree root is saved in map subtree_root. 3. bpf_is_subdir() is used to check whether a fanotify event happens inside the subtree. Only events happened in the subtree are passed to userspace. 4. A bpf map (inode_storage_map) is used to cache result from bpf_is_subdir(). 5. subsubtree moving is not handled. This is because we don't yet have a good way to walk a subtree from BPF (something similar to d_walk). Signed-off-by: Song Liu <song@kernel.org> --- tools/testing/selftests/bpf/bpf_kfuncs.h | 5 + tools/testing/selftests/bpf/config | 2 + .../testing/selftests/bpf/prog_tests/fan_fp.c | 264 ++++++++++++++++++ tools/testing/selftests/bpf/progs/fan_fp.c | 154 ++++++++++ 4 files changed, 425 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/fan_fp.c create mode 100644 tools/testing/selftests/bpf/progs/fan_fp.c