@@ -29,8 +29,16 @@ SYSTEMD_SERVICES=\
xfs_scrub_all.service \
xfs_scrub_all_fail.service \
xfs_scrub_all.timer \
- system-xfs_scrub.slice
+ system-xfs_scrub.slice \
+ xfs_scrubbed@.service
OPTIONAL_TARGETS += $(SYSTEMD_SERVICES)
+
+ifeq ($(HAVE_UDEV),yes)
+ XFS_SCRUBBED_UDEV_RULES = xfs_scrubbed.rules
+ XFS_SCRUBBED_HELPER = xfs_scrubbed_start
+ INSTALL_SCRUB += install-udev-scrubbed
+ OPTIONAL_TARGETS += $(XFS_SCRUBBED_HELPER)
+endif
endif
ifeq ($(HAVE_CROND),yes)
INSTALL_SCRUB += install-crond
@@ -185,6 +193,14 @@ install-udev: $(UDEV_RULES)
$(INSTALL) -m 644 $$i $(UDEV_RULE_DIR)/64-$$i; \
done
+install-udev-scrubbed: $(XFS_SCRUBBED_HELPER)
+ $(INSTALL) -m 755 -d $(UDEV_DIR)
+ $(INSTALL) -m 755 $(XFS_SCRUBBED_HELPER) $(UDEV_DIR)
+ $(INSTALL) -m 755 -d $(UDEV_RULE_DIR)
+ for i in $(XFS_SCRUBBED_UDEV_RULES); do \
+ $(INSTALL) -m 644 $$i $(UDEV_RULE_DIR)/64-$$i; \
+ done
+
install-dev:
-include .dep
@@ -19,6 +19,7 @@ import gc
from concurrent.futures import ProcessPoolExecutor
import ctypes.util
import collections
+import time
try:
# Not all systems will have this json schema validation libarary,
@@ -994,6 +995,14 @@ def main():
pass
args.event_queue.shutdown()
+
+ # See the service mode comments in xfs_scrub.c for why we sleep and
+ # compress all nonzero exit codes to 1.
+ if 'SERVICE_MODE' in os.environ:
+ time.sleep(2)
+ if ret != 0:
+ ret = 1
+
return ret
if __name__ == '__main__':
new file mode 100644
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Copyright (c) 2024-2025 Oracle. All rights reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+#
+# Start autonomous self healing automatically
+ACTION=="add", SUBSYSTEM=="xfs", ENV{TYPE}=="mount", RUN+="xfs_scrubbed_start"
new file mode 100644
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Copyright (c) 2024-2025 Oracle. All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+
+[Unit]
+Description=Self Healing of XFS Metadata for %f
+Documentation=man:xfs_scrubbed(8)
+
+# Explicitly require the capabilities that this program needs
+ConditionCapability=CAP_SYS_ADMIN
+ConditionCapability=CAP_DAC_OVERRIDE
+
+# Must be a mountpoint
+ConditionPathIsMountPoint=%f
+RequiresMountsFor=%f
+
+[Service]
+Type=exec
+Environment=SERVICE_MODE=1
+ExecStart=@pkg_libexec_dir@/xfs_scrubbed --log %f
+SyslogIdentifier=%N
+
+# Run scrub with minimal CPU and IO priority so that nothing else will starve.
+IOSchedulingClass=idle
+CPUSchedulingPolicy=idle
+CPUAccounting=true
+Nice=19
+
+# Create the service underneath the scrub background service slice so that we
+# can control resource usage.
+Slice=system-xfs_scrub.slice
+
+# No realtime CPU scheduling
+RestrictRealtime=true
+
+# Dynamically create a user that isn't root
+DynamicUser=true
+
+# Make the entire filesystem readonly, but don't hide /home and don't use a
+# private bind mount like xfs_scrub. We don't want to pin the filesystem,
+# because we want umount to work correctly and this service to stop
+# automatically.
+ProtectSystem=strict
+ProtectHome=no
+PrivateTmp=true
+PrivateDevices=true
+
+# Don't let scrub complain about paths in /etc/projects that have been hidden
+# by our sandboxing. scrub doesn't care about project ids anyway.
+InaccessiblePaths=-/etc/projects
+
+# No network access
+PrivateNetwork=true
+ProtectHostname=true
+RestrictAddressFamilies=none
+IPAddressDeny=any
+
+# Don't let the program mess with the kernel configuration at all
+ProtectKernelLogs=true
+ProtectKernelModules=true
+ProtectKernelTunables=true
+ProtectControlGroups=true
+ProtectProc=invisible
+RestrictNamespaces=true
+
+# Hide everything in /proc, even /proc/mounts
+ProcSubset=pid
+
+# Only allow the default personality Linux
+LockPersonality=true
+
+# No writable memory pages
+MemoryDenyWriteExecute=true
+
+# Don't let our mounts leak out to the host
+PrivateMounts=true
+
+# Restrict system calls to the native arch and only enough to get things going
+SystemCallArchitectures=native
+SystemCallFilter=@system-service
+SystemCallFilter=~@privileged
+SystemCallFilter=~@resources
+SystemCallFilter=~@mount
+
+# xfs_scrubbed needs these privileges to open the rootdir and monitor
+CapabilityBoundingSet=CAP_SYS_ADMIN CAP_DAC_OVERRIDE
+AmbientCapabilities=CAP_SYS_ADMIN CAP_DAC_OVERRIDE
+NoNewPrivileges=true
+
+# xfs_scrubbed doesn't create files
+UMask=7777
+
+# No access to hardware /dev files except for block devices
+ProtectClock=true
+DevicePolicy=closed
+
+[Install]
+WantedBy=multi-user.target
+# If someone tries to enable the template itself, translate that into enabling
+# this service on the root directory at systemd startup time. In the
+# initramfs, the udev rules in xfs_scrubbed.rules run before systemd starts.
+DefaultInstance=-
new file mode 100755
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Copyright (c) 2024-2025 Oracle. All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+
+# Start the xfs_scrubbed service when the filesystem is mounted
+
+command -v systemctl || exit 0
+
+grep "^$SOURCE[[:space:]]" /proc/mounts | while read source mntpt therest; do
+ inst="$(systemd-escape --path "$mntpt")"
+ systemctl restart --no-block "xfs_scrubbed@$inst" && break
+done
+
+exit 0