@@ -18,6 +18,7 @@ XFS_SCRUB_ALL_PROG = xfs_scrub_all
XFS_SCRUB_FAIL_PROG = xfs_scrub_fail
XFS_SCRUB_ARGS = -p
XFS_SCRUB_SERVICE_ARGS = -b -o autofsck
+XFS_SCRUBBED_PROG = xfs_scrubbed
ifeq ($(HAVE_SYSTEMD),yes)
INSTALL_SCRUB += install-systemd
SYSTEMD_SERVICES=\
@@ -108,9 +109,9 @@ endif
# Automatically trigger a media scan once per month
XFS_SCRUB_ALL_AUTO_MEDIA_SCAN_INTERVAL=1mo
-LDIRT = $(XFS_SCRUB_ALL_PROG) $(XFS_SCRUB_FAIL_PROG) *.service *.cron
+LDIRT = $(XFS_SCRUB_ALL_PROG) $(XFS_SCRUB_FAIL_PROG) $(XFS_SCRUBBED_PROG) *.service *.cron
-default: depend $(LTCOMMAND) $(XFS_SCRUB_ALL_PROG) $(XFS_SCRUB_FAIL_PROG) $(OPTIONAL_TARGETS)
+default: depend $(LTCOMMAND) $(XFS_SCRUB_ALL_PROG) $(XFS_SCRUB_FAIL_PROG) $(XFS_SCRUBBED_PROG) $(OPTIONAL_TARGETS)
xfs_scrub_all: xfs_scrub_all.in $(builddefs)
@echo " [SED] $@"
@@ -123,6 +124,14 @@ xfs_scrub_all: xfs_scrub_all.in $(builddefs)
-e "s|@scrub_args@|$(XFS_SCRUB_ARGS)|g" < $< > $@
$(Q)chmod a+x $@
+xfs_scrubbed: xfs_scrubbed.in $(builddefs)
+ @echo " [SED] $@"
+ $(Q)$(SED) -e "s|@sbindir@|$(PKG_SBIN_DIR)|g" \
+ -e "s|@scrub_svcname@|$(scrub_svcname)|g" \
+ -e "s|@pkg_version@|$(PKG_VERSION)|g" \
+ < $< > $@
+ $(Q)chmod a+x $@
+
xfs_scrub_fail: xfs_scrub_fail.in $(builddefs)
@echo " [SED] $@"
$(Q)$(SED) -e "s|@sbindir@|$(PKG_SBIN_DIR)|g" \
@@ -165,6 +174,8 @@ install-scrub: default
$(INSTALL) -m 755 -d $(PKG_SBIN_DIR)
$(LTINSTALL) -m 755 $(LTCOMMAND) $(PKG_SBIN_DIR)
$(INSTALL) -m 755 $(XFS_SCRUB_ALL_PROG) $(PKG_SBIN_DIR)
+ $(INSTALL) -m 755 -d $(PKG_LIBEXEC_DIR)
+ $(INSTALL) -m 755 $(XFS_SCRUBBED_PROG) $(PKG_LIBEXEC_DIR)
$(INSTALL) -m 755 -d $(PKG_STATE_DIR)
install-udev: $(UDEV_RULES)
new file mode 100644
@@ -0,0 +1,287 @@
+#!/usr/bin/python3
+
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) 2024-2025 Oracle. All rights reserved.
+#
+# Author: Darrick J. Wong <djwong@kernel.org>
+
+# Daemon to listen for and react to filesystem health events
+
+import sys
+import os
+import argparse
+import fcntl
+import json
+import datetime
+import errno
+import ctypes
+import gc
+from concurrent.futures import ProcessPoolExecutor
+
+debug = False
+log = False
+everything = False
+debug_fast = False
+printf_prefix = ''
+
+# ioctl encoding stuff
+_IOC_NRBITS = 8
+_IOC_TYPEBITS = 8
+_IOC_SIZEBITS = 14
+_IOC_DIRBITS = 2
+
+_IOC_NRMASK = (1 << _IOC_NRBITS) - 1
+_IOC_TYPEMASK = (1 << _IOC_TYPEBITS) - 1
+_IOC_SIZEMASK = (1 << _IOC_SIZEBITS) - 1
+_IOC_DIRMASK = (1 << _IOC_DIRBITS) - 1
+
+_IOC_NRSHIFT = 0
+_IOC_TYPESHIFT = (_IOC_NRSHIFT + _IOC_NRBITS)
+_IOC_SIZESHIFT = (_IOC_TYPESHIFT + _IOC_TYPEBITS)
+_IOC_DIRSHIFT = (_IOC_SIZESHIFT + _IOC_SIZEBITS)
+
+_IOC_NONE = 0
+_IOC_WRITE = 1
+_IOC_READ = 2
+
+def _IOC(direction, type, nr, t):
+ assert direction <= _IOC_DIRMASK, direction
+ assert type <= _IOC_TYPEMASK, type
+ assert nr <= _IOC_NRMASK, nr
+
+ size = ctypes.sizeof(t)
+ assert size <= _IOC_SIZEMASK, size
+
+ return (((direction) << _IOC_DIRSHIFT) |
+ ((type) << _IOC_TYPESHIFT) |
+ ((nr) << _IOC_NRSHIFT) |
+ ((size) << _IOC_SIZESHIFT))
+
+def _IOR(type, number, size):
+ return _IOC(_IOC_READ, type, number, size)
+
+def _IOW(type, number, size):
+ return _IOC(_IOC_WRITE, type, number, size)
+
+def _IOWR(type, number, size):
+ return _IOC(_IOC_READ | _IOC_WRITE, type, number, size)
+
+# xfs health monitoring ioctl stuff
+XFS_HEALTH_MONITOR_FMT_JSON = 1
+XFS_HEALTH_MONITOR_VERBOSE = 1 << 0
+
+class xfs_health_monitor(ctypes.Structure):
+ _fields_ = [
+ ('flags', ctypes.c_ulonglong),
+ ('format', ctypes.c_ubyte),
+ ('_pad0', ctypes.c_ubyte * 7),
+ ('_pad1', ctypes.c_ulonglong * 2)
+ ]
+assert ctypes.sizeof(xfs_health_monitor) == 32
+
+XFS_IOC_HEALTH_MONITOR = _IOW(0x58, 68, xfs_health_monitor)
+
+def open_health_monitor(fd, verbose = False):
+ '''Return a health monitoring fd.'''
+
+ arg = xfs_health_monitor()
+ arg.format = XFS_HEALTH_MONITOR_FMT_JSON
+
+ if verbose:
+ arg.flags |= XFS_HEALTH_MONITOR_VERBOSE
+
+ ret = fcntl.ioctl(fd, XFS_IOC_HEALTH_MONITOR, arg)
+ return ret
+
+# main program
+
+def health_reports(mon_fp):
+ '''Generate python objects describing health events.'''
+ global debug
+ global printf_prefix
+
+ lines = []
+ buf = mon_fp.readline()
+ while buf != '':
+ for line in buf.split('\0'):
+ line = line.strip()
+ if debug:
+ print(f'new line: {line}')
+ if line == '':
+ continue
+
+ lines.append(line)
+ if not '}' in line:
+ continue
+
+ s = ''.join(lines)
+ if debug:
+ print(f'new event: {s}')
+ try:
+ yield json.loads(s)
+ except json.decoder.JSONDecodeError as e:
+ print(f"{printf_prefix}: {e} from {s}",
+ file = sys.stderr)
+ pass
+ lines = []
+ buf = mon_fp.readline()
+
+def log_event(event):
+ '''Log a monitoring event to stdout.'''
+ global printf_prefix
+
+ print(f"{printf_prefix}: {event}")
+ sys.stdout.flush()
+
+def report_lost(event):
+ '''Report that the kernel lost events.'''
+ global printf_prefix
+
+ print(f"{printf_prefix}: Events were lost.")
+ sys.stdout.flush()
+
+def report_shutdown(event):
+ '''Report an abortive shutdown of the filesystem.'''
+ global printf_prefix
+ REASONS = {
+ "meta_ioerr": "metadata IO error",
+ "log_ioerr": "log IO error",
+ "force_umount": "forced unmount",
+ "corrupt_incore": "in-memory state corruption",
+ "corrupt_ondisk": "ondisk metadata corruption",
+ "device_removed": "device removal",
+ }
+
+ reasons = []
+ for reason in event['reasons']:
+ if reason in REASONS:
+ reasons.append(REASONS[reason])
+ else:
+ reasons.append(reason)
+
+ print(f"{printf_prefix}: Filesystem shut down due to {', '.join(reasons)}.")
+ sys.stdout.flush()
+
+def handle_event(event):
+ '''Handle an event asynchronously.'''
+ def stringify_timestamp(event):
+ '''Try to convert a timestamp to something human readable.'''
+ try:
+ ts = datetime.datetime.fromtimestamp(event['time_ns'] / 1e9).astimezone()
+ event['time'] = str(ts)
+ del event['time_ns']
+ except Exception as e:
+ # Not a big deal if we can't format the timestamp, but
+ # let's yell about that loudly
+ print(f'{printf_prefix}: bad timestamp: {e}', file = sys.stderr)
+
+ global log
+
+ stringify_timestamp(event)
+ if log:
+ log_event(event)
+ if event['type'] == 'lost':
+ report_lost(event)
+ elif event['type'] == 'shutdown':
+ report_shutdown(event)
+
+def monitor(mountpoint, event_queue, **kwargs):
+ '''Monitor the given mountpoint for health events.'''
+ global everything
+
+ fd = os.open(mountpoint, os.O_RDONLY)
+ try:
+ mon_fd = open_health_monitor(fd, verbose = everything)
+ except OSError as e:
+ if e.errno != errno.ENOTTY and e.errno != errno.EOPNOTSUPP:
+ raise e
+ print(f"{mountpoint}: XFS health monitoring not supported.",
+ file = sys.stderr)
+ return 1
+ finally:
+ # Close the mountpoint if opening the health monitor fails
+ os.close(fd)
+
+ # Ownership of mon_fd (and hence responsibility for closing it) is
+ # transferred to the mon_fp object.
+ with os.fdopen(mon_fd) as mon_fp:
+ nr = 0
+ for e in health_reports(mon_fp):
+ event_queue.submit(handle_event, e)
+
+ # Periodically run the garbage collector to constrain
+ # memory usage in the main thread. If only there was
+ # a way to submit to a queue without everything being
+ # tied up in a Future
+ if nr % 5355 == 0:
+ gc.collect()
+ nr += 1
+
+ return 0
+
+def main():
+ global debug
+ global log
+ global printf_prefix
+ global everything
+ global debug_fast
+
+ parser = argparse.ArgumentParser( \
+ description = "XFS filesystem health monitoring demon.")
+ parser.add_argument("--debug", help = "Enabling debugging messages.", \
+ action = "store_true")
+ parser.add_argument("--log", help = "Log health events to stdout.", \
+ action = "store_true")
+ parser.add_argument("--everything", help = "Capture all events.", \
+ action = "store_true")
+ parser.add_argument("-V", help = "Report version and exit.", \
+ action = "store_true")
+ parser.add_argument('mountpoint', default = None, nargs = '?',
+ help = 'XFS filesystem mountpoint to target.')
+ parser.add_argument('--debug-fast', action = 'store_true', \
+ help = argparse.SUPPRESS)
+ args = parser.parse_args()
+
+ if args.V:
+ print("xfs_scrubbed version @pkg_version@")
+ return 0
+
+ if args.mountpoint is None:
+ parser.error("the following arguments are required: mountpoint")
+ return 1
+
+ if args.debug:
+ debug = True
+ if args.log:
+ log = True
+ if args.everything:
+ everything = True
+ if args.debug_fast:
+ debug_fast = True
+
+ # Use a separate subprocess to handle the events so that the main event
+ # reading process does not block on the GIL of the event handling
+ # subprocess. The downside is that we cannot pass function pointers
+ # and all data must be pickleable; the upside is not losing events.
+ #
+ # If the secret maximum efficiency setting is enabled, assume this is
+ # part of QA, so use all CPUs to process events. Normally we start one
+ # background process to minimize service footprint.
+ if debug_fast:
+ args.event_queue = ProcessPoolExecutor()
+ else:
+ args.event_queue = ProcessPoolExecutor(max_workers = 1)
+
+ printf_prefix = args.mountpoint
+ ret = 0
+ try:
+ ret = monitor(**vars(args))
+ except KeyboardInterrupt:
+ # Consider SIGINT to be a clean exit.
+ pass
+
+ args.event_queue.shutdown()
+ return ret
+
+if __name__ == '__main__':
+ sys.exit(main())