@@ -6,6 +6,7 @@ config XFS_FS
select LIBCRC32C
select FS_IOMAP
select TIME_STATS if XFS_TIME_STATS
+ select THREAD_WITH_FILE if XFS_HEALTH_MONITOR
help
XFS is a high performance journaling filesystem which originated
on the SGI IRIX platform. It is completely multi-threaded, can
@@ -128,6 +129,14 @@ config XFS_TIME_STATS
help
Collects time statistics on various operations in the filesystem.
+config XFS_HEALTH_MONITOR
+ bool "Report filesystem health events to userspace"
+ depends on XFS_FS
+ select XFS_LIVE_HOOKS
+ default y
+ help
+ Report health events to userspace programs.
+
config XFS_DRAIN_INTENTS
bool
select JUMP_LABEL if HAVE_ARCH_JUMP_LABEL
@@ -154,6 +154,7 @@ xfs-$(CONFIG_XFS_LIVE_HOOKS) += xfs_hooks.o
xfs-$(CONFIG_XFS_MEMORY_BUFS) += xfs_buf_mem.o
xfs-$(CONFIG_XFS_BTREE_IN_MEM) += libxfs/xfs_btree_mem.o
xfs-$(CONFIG_XFS_TIME_STATS) += xfs_timestats.o
+xfs-$(CONFIG_XFS_HEALTH_MONITOR) += xfs_healthmon.o
# online scrub/repair
ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
@@ -855,6 +855,7 @@ struct xfs_scrub_metadata {
#define XFS_IOC_FSGETXATTRA _IOR ('X', 45, struct fsxattr)
/* XFS_IOC_SETBIOSIZE ---- deprecated 46 */
/* XFS_IOC_GETBIOSIZE ---- deprecated 47 */
+/* XFS_IOC_HEALTHMON -------- staging 48 */
#define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap)
#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64)
#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks)
@@ -303,4 +303,14 @@ struct xfs_map_freesp {
*/
#define XFS_IOC_MAP_FREESP _IOWR('X', 64, struct xfs_map_freesp)
+struct xfs_health_monitor {
+ __u64 flags; /* flags */
+ __u8 format; /* output format */
+ __u8 pad1[7]; /* zeroes */
+ __u64 pad2[2]; /* zeroes */
+};
+
+/* Monitor for health events. */
+#define XFS_IOC_HEALTH_MONITOR _IOR ('X', 48, struct xfs_health_monitor)
+
#endif /* __XFS_FS_STAGING_H__ */
new file mode 100644
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_trace.h"
+#include "xfs_health.h"
+#include "xfs_ag.h"
+#include "xfs_btree.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_quota_defs.h"
+#include "xfs_rtgroup.h"
+#include "xfs_healthmon.h"
+
+/*
+ * Live Health Monitoring
+ * ======================
+ *
+ * Autonomous self-healing of XFS filesystems requires a means for the kernel
+ * to send filesystem health events to a monitoring daemon in userspace. To
+ * accomplish this, we establish a thread_with_file kthread object to handle
+ * translating internal events about filesystem health into a format that can
+ * be parsed easily by userspace. Then we hook various parts of the filesystem
+ * to supply those internal events to the kthread. Userspace reads events
+ * from the file descriptor returned by the ioctl.
+ *
+ * The healthmon abstraction has a weak reference to the host filesystem mount
+ * so that the queueing and processing of the events do not pin the mount and
+ * cannot slow down the main filesystem. The healthmon object can exist past
+ * the end of the filesystem mount.
+ */
+
+struct xfs_healthmon {
+ /* thread with stdio redirection */
+ struct thread_with_stdio thread;
+};
+
+static inline struct xfs_healthmon *
+to_healthmon(struct thread_with_stdio *thr)
+{
+ return container_of(thr, struct xfs_healthmon, thread);
+}
+
+/* Free the health monitoring information. */
+STATIC void
+xfs_healthmon_exit(
+ struct thread_with_stdio *thr)
+{
+ struct xfs_healthmon *hm = to_healthmon(thr);
+
+ kfree(hm);
+ module_put(THIS_MODULE);
+}
+
+/* Pipe health monitoring information to userspace. */
+STATIC void
+xfs_healthmon_run(
+ struct thread_with_stdio *thr)
+{
+}
+
+/* Validate ioctl parameters. */
+static inline bool
+xfs_healthmon_validate(
+ const struct xfs_health_monitor *hmo)
+{
+ if (hmo->flags)
+ return false;
+ if (hmo->format)
+ return false;
+ if (memchr_inv(&hmo->pad1, 0, sizeof(hmo->pad1)))
+ return false;
+ if (memchr_inv(&hmo->pad2, 0, sizeof(hmo->pad2)))
+ return false;
+ return true;
+}
+
+static const struct thread_with_stdio_ops xfs_healthmon_ops = {
+ .exit = xfs_healthmon_exit,
+ .fn = xfs_healthmon_run,
+};
+
+/*
+ * Create a health monitoring file. Returns an index to the fd table or a
+ * negative errno.
+ */
+int
+xfs_healthmon_create(
+ struct xfs_mount *mp,
+ struct xfs_health_monitor *hmo)
+{
+ struct xfs_healthmon *hm;
+ int ret;
+
+ if (!xfs_healthmon_validate(hmo))
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!try_module_get(THIS_MODULE))
+ return -ENOMEM;
+
+ hm = kzalloc(sizeof(*hm), GFP_KERNEL);
+ if (!hm) {
+ ret = -ENOMEM;
+ goto out_mod;
+ }
+
+ ret = run_thread_with_stdout(&hm->thread, &xfs_healthmon_ops);
+ if (ret < 0)
+ goto out_hm;
+
+ return ret;
+out_hm:
+ kfree(hm);
+out_mod:
+ module_put(THIS_MODULE);
+ return ret;
+}
new file mode 100644
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_HEALTHMON_H__
+#define __XFS_HEALTHMON_H__
+
+#ifdef CONFIG_XFS_HEALTH_MONITOR
+int xfs_healthmon_create(struct xfs_mount *mp, struct xfs_health_monitor *hmo);
+#else
+# define xfs_healthmon_create(mp, hmo) (-EOPNOTSUPP)
+#endif /* CONFIG_XFS_HEALTH_MONITOR */
+
+#endif /* __XFS_HEALTHMON_H__ */
@@ -44,6 +44,7 @@
#include "xfs_file.h"
#include "xfs_exchrange.h"
#include "xfs_rtgroup.h"
+#include "xfs_healthmon.h"
#include <linux/mount.h>
#include <linux/namei.h>
@@ -2429,6 +2430,23 @@ xfs_ioc_map_freesp(
# define xfs_ioc_map_freesp(...) (-ENOTTY)
#endif
+#ifdef CONFIG_XFS_EXPERIMENTAL_IOCTLS
+STATIC int
+xfs_ioc_health_monitor(
+ struct xfs_mount *mp,
+ struct xfs_health_monitor __user *arg)
+{
+ struct xfs_health_monitor hmo;
+
+ if (copy_from_user(&hmo, arg, sizeof(hmo)))
+ return -EFAULT;
+
+ return xfs_healthmon_create(mp, &hmo);
+}
+#else
+# define xfs_ioc_health_monitor(...) (-ENOTTY)
+#endif
+
/*
* These long-unused ioctls were removed from the official ioctl API in 5.17,
* but retain these definitions so that we can log warnings about them.
@@ -2685,6 +2703,9 @@ xfs_file_ioctl(
case XFS_IOC_MAP_FREESP:
return xfs_ioc_map_freesp(filp, arg);
+ case XFS_IOC_HEALTH_MONITOR:
+ return xfs_ioc_health_monitor(mp, arg);
+
default:
return -ENOTTY;
}
@@ -69,6 +69,9 @@ typedef __u32 xfs_nlink_t;
# include <linux/time_stats.h>
#endif
#include <linux/sched/clock.h>
+#ifdef CONFIG_XFS_HEALTH_MONITOR
+# include <linux/thread_with_file.h>
+#endif
#include <asm/page.h>
#include <asm/div64.h>