@@ -18,6 +18,7 @@ XFS_SCRUB_ALL_PROG = xfs_scrub_all
XFS_SCRUB_FAIL_PROG = xfs_scrub_fail
XFS_SCRUB_ARGS = -p
XFS_SCRUB_SERVICE_ARGS = -b
+XFS_SCRUBBED_PROG = xfs_scrubbed
ifeq ($(HAVE_SYSTEMD),yes)
INSTALL_SCRUB += install-systemd
SYSTEMD_SERVICES=\
@@ -124,9 +125,9 @@ endif
# Automatically trigger a media scan once per month
XFS_SCRUB_ALL_AUTO_MEDIA_SCAN_INTERVAL=1mo
-LDIRT = $(XFS_SCRUB_ALL_PROG) $(XFS_SCRUB_FAIL_PROG) *.service *.cron
+LDIRT = $(XFS_SCRUB_ALL_PROG) $(XFS_SCRUB_FAIL_PROG) $(XFS_SCRUBBED_PROG) *.service *.cron
-default: depend $(LTCOMMAND) $(XFS_SCRUB_ALL_PROG) $(XFS_SCRUB_FAIL_PROG) $(OPTIONAL_TARGETS)
+default: depend $(LTCOMMAND) $(XFS_SCRUB_ALL_PROG) $(XFS_SCRUB_FAIL_PROG) $(XFS_SCRUBBED_PROG) $(OPTIONAL_TARGETS)
xfs_scrub_all: xfs_scrub_all.in $(builddefs)
@echo " [SED] $@"
@@ -139,6 +140,14 @@ xfs_scrub_all: xfs_scrub_all.in $(builddefs)
-e "s|@scrub_args@|$(XFS_SCRUB_ARGS)|g" < $< > $@
$(Q)chmod a+x $@
+xfs_scrubbed: xfs_scrubbed.in $(builddefs)
+ @echo " [SED] $@"
+ $(Q)$(SED) -e "s|@sbindir@|$(PKG_SBIN_DIR)|g" \
+ -e "s|@scrub_svcname@|$(scrub_svcname)|g" \
+ -e "s|@pkg_version@|$(PKG_VERSION)|g" \
+ < $< > $@
+ $(Q)chmod a+x $@
+
xfs_scrub_fail: xfs_scrub_fail.in $(builddefs)
@echo " [SED] $@"
$(Q)$(SED) -e "s|@sbindir@|$(PKG_SBIN_DIR)|g" \
@@ -182,6 +191,8 @@ install-scrub: default
$(INSTALL) -m 755 -d $(PKG_SBIN_DIR)
$(LTINSTALL) -m 755 $(LTCOMMAND) $(PKG_SBIN_DIR)
$(INSTALL) -m 755 $(XFS_SCRUB_ALL_PROG) $(PKG_SBIN_DIR)
+ $(INSTALL) -m 755 -d $(PKG_LIBEXEC_DIR)
+ $(INSTALL) -m 755 $(XFS_SCRUBBED_PROG) $(PKG_LIBEXEC_DIR)
$(INSTALL) -m 755 -d $(PKG_STATE_DIR)
install-udev: $(UDEV_RULES)
new file mode 100644
@@ -0,0 +1,217 @@
+#!/usr/bin/python3
+
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2024 Oracle. All rights reserved.
+#
+# Author: Darrick J. Wong <djwong@kernel.org>
+
+# Daemon to listen for and react to filesystem health events
+
+import sys
+import os
+import argparse
+import fcntl
+import struct
+import json
+import datetime
+import errno
+
+debug = False
+log = False
+everything = False
+printf_prefix = ''
+
+# ioctl encoding stuff
+_IOC_NRBITS = 8
+_IOC_TYPEBITS = 8
+_IOC_SIZEBITS = 14
+_IOC_DIRBITS = 2
+
+_IOC_NRSHIFT = 0
+_IOC_TYPESHIFT = (_IOC_NRSHIFT + _IOC_NRBITS)
+_IOC_SIZESHIFT = (_IOC_TYPESHIFT + _IOC_TYPEBITS)
+_IOC_DIRSHIFT = (_IOC_SIZESHIFT + _IOC_SIZEBITS)
+
+_IOC_NONE = 0
+_IOC_WRITE = 1
+_IOC_READ = 2
+
+def _IOC(direction, type, nr, size):
+ return (((direction) << _IOC_DIRSHIFT) |
+ ((type) << _IOC_TYPESHIFT) |
+ ((nr) << _IOC_NRSHIFT) |
+ ((size) << _IOC_SIZESHIFT))
+
+def _IOR(type, number, size):
+ return _IOC(_IOC_READ, type, number, size)
+
+# xfs health monitoring ioctl stuff
+XFS_HEALTH_MONITOR_FMT_JSON = 1
+XFS_HEALTH_MONITOR_VERBOSE = 1 << 0
+xfs_health_monitor = struct.Struct('QB' + ('x' * 23))
+XFS_IOC_HEALTH_MONITOR = _IOR(0x58, 48, xfs_health_monitor.size)
+
+def open_health_monitor(fd, verbose = False):
+ '''Return a health monitoring fd.'''
+ assert xfs_health_monitor.size == 32
+
+ flags = 0
+ fmt = XFS_HEALTH_MONITOR_FMT_JSON
+
+ if verbose:
+ flags |= XFS_HEALTH_MONITOR_VERBOSE
+
+ # Create an immutable byte array representation of struct args, then
+ # pass it to the ioctl function as a mutable byte array so that the
+ # return value is the kernel fd and /not/ the post-syscall byte array
+ # contents.
+ arg = xfs_health_monitor.pack(flags, fmt)
+ ret = fcntl.ioctl(fd, XFS_IOC_HEALTH_MONITOR, bytearray(arg))
+ return ret
+
+# main program
+
+def health_reports(mon_fp):
+ '''Generate python objects describing health events.'''
+ global debug, printf_prefix
+
+ lines = []
+ buf = mon_fp.readline()
+ while buf != '':
+ for line in buf.split('\0'):
+ line = line.strip()
+ if debug:
+ print(f'new line: {line}')
+ if line == '':
+ continue
+
+ lines.append(line)
+ if not '}' in line:
+ continue
+
+ s = ''.join(lines)
+ if debug:
+ print(f'new event: {s}')
+ try:
+ yield json.loads(s)
+ except json.decoder.JSONDecodeError as e:
+ print(f"{printf_prefix}: {e} from {s}",
+ file = sys.stderr)
+ pass
+ lines = []
+ buf = mon_fp.readline()
+
+def log_event(event):
+ global printf_prefix
+
+ print(f"{printf_prefix}: {event}")
+ sys.stdout.flush()
+
+def report_lost(event):
+ '''Report that the kernel lost events.'''
+ global printf_prefix
+
+ print(f"{printf_prefix}: Events were lost.")
+ sys.stdout.flush()
+
+def report_shutdown(event):
+ '''Report an abortive shutdown of the filesystem.'''
+ global printf_prefix
+ REASONS = {
+ "meta_ioerr": "metadata IO error",
+ "log_ioerr": "log IO error",
+ "force_umount": "forced unmount",
+ "corrupt_incore": "in-memory state corruption",
+ "corrupt_ondisk": "ondisk metadata corruption",
+ "device_removed": "device removal",
+ }
+
+ reasons = []
+ for reason in event['reasons']:
+ if reason in REASONS:
+ reasons.append(REASONS[reason])
+ else:
+ reasons.append(reason)
+
+ print(f"{printf_prefix}: Filesystem shut down due to {', '.join(reasons)}.")
+ sys.stdout.flush()
+
+def monitor(mountpoint):
+ '''Monitor the given mountpoint for health events.'''
+ global log, everything
+
+ fd = os.open(mountpoint, os.O_RDONLY)
+ try:
+ mon_fd = open_health_monitor(fd, verbose = everything)
+ except OSError as e:
+ if e.errno != errno.ENOTTY:
+ raise e
+ print(f"{mountpoint}: XFS health monitoring not supported.",
+ file = sys.stderr)
+ return 1
+ finally:
+ # Close the mountpoint if opening the health monitor fails
+ os.close(fd)
+
+ # Ownership of mon_fd (and hence responsibility for closing it) is
+ # transferred to the mon_fp object.
+ with os.fdopen(mon_fd) as mon_fp:
+ for event in health_reports(mon_fp):
+ try:
+ ts = datetime.datetime.fromtimestamp(event['time_ns'] / 1e9).astimezone()
+ event['time'] = str(ts)
+ del event['time_ns']
+ except Exception as e:
+ print(e)
+ pass
+ if log:
+ log_event(event)
+ if event['type'] == 'lost':
+ report_lost(event)
+ elif event['type'] == 'shutdown':
+ report_shutdown(event)
+
+ return 0
+
+def main():
+ global debug, log, printf_prefix, everything
+ ret = 0
+
+ parser = argparse.ArgumentParser( \
+ description = "XFS filesystem health monitoring demon.")
+ parser.add_argument("--debug", help = "Enabling debugging messages.", \
+ action = "store_true")
+ parser.add_argument("--log", help = "Log health events to stdout.", \
+ action = "store_true")
+ parser.add_argument("--everything", help = "Capture all events.", \
+ action = "store_true")
+ parser.add_argument("-V", help = "Report version and exit.", \
+ action = "store_true")
+ parser.add_argument('mountpoint', default = None, nargs = '?',
+ help = 'XFS filesystem mountpoint to target.')
+ args = parser.parse_args()
+
+ if args.V:
+ print("xfs_scrubbed version @pkg_version@")
+ sys.exit(0)
+
+ if args.mountpoint is None:
+ parser.error("the following arguments are required: mountpoint")
+ sys.exit(1)
+
+ if args.debug:
+ debug = True
+ if args.log:
+ log = True
+ if args.everything:
+ everything = True
+
+ printf_prefix = args.mountpoint
+ try:
+ ret = monitor(args.mountpoint)
+ except KeyboardInterrupt:
+ ret = 0
+ sys.exit(ret)
+
+if __name__ == '__main__':
+ main()