@@ -4,7 +4,7 @@ xfs_scrub_all \- scrub all mounted XFS filesystems
.SH SYNOPSIS
.B xfs_scrub_all
[
-.B \-hV
+.B \-hxV
]
.SH DESCRIPTION
.B xfs_scrub_all
@@ -21,6 +21,9 @@ the same device simultaneously.
.B \-h
Display help.
.TP
+.B \-x
+Read all file data extents to look for disk errors.
+.TP
.B \-V
Prints the version number and exits.
.SH EXIT CODE
@@ -18,6 +18,8 @@ INSTALL_SCRUB += install-systemd
SYSTEMD_SERVICES=\
xfs_scrub@.service \
xfs_scrub_fail@.service \
+ xfs_scrub_media@.service \
+ xfs_scrub_media_fail@.service \
xfs_scrub_all.service \
xfs_scrub_all.timer \
system-xfs_scrub.slice
@@ -18,6 +18,7 @@ from io import TextIOWrapper
retcode = 0
terminate = False
+scrub_media = False
def DEVNULL():
'''Return /dev/null in subprocess writable format.'''
@@ -111,6 +112,17 @@ def systemd_escape(path):
except:
return path
+def scrub_unitname(mnt):
+ '''Return the systemd service name.'''
+ global scrub_media
+
+ if mnt != '*':
+ mnt = systemd_escape(mnt)
+
+ if scrub_media:
+ return 'xfs_scrub_media@%s' % mnt
+ return 'xfs_scrub@%s' % mnt
+
def systemctl_stop(unitname):
'''Stop a systemd unit.'''
cmd = ['systemctl', 'stop', unitname]
@@ -163,7 +175,7 @@ def systemctl_start(unitname, killfuncs):
def run_scrub(mnt, cond, running_devs, mntdevs, killfuncs):
'''Run a scrub process.'''
- global retcode, terminate
+ global retcode, terminate, scrub_media
print("Scrubbing %s..." % mnt)
sys.stdout.flush()
@@ -173,7 +185,7 @@ def run_scrub(mnt, cond, running_devs, mntdevs, killfuncs):
return
# Try it the systemd way
- unitname = 'xfs_scrub@%s' % systemd_escape(mnt)
+ unitname = scrub_unitname(mnt)
ret = systemctl_start(unitname, killfuncs)
if ret == 0 or ret == 1:
print("Scrubbing %s done, (err=%d)" % (mnt, ret))
@@ -187,6 +199,8 @@ def run_scrub(mnt, cond, running_devs, mntdevs, killfuncs):
# Invoke xfs_scrub manually
cmd = ['@sbindir@/xfs_scrub']
cmd += '@scrub_args@'.split()
+ if scrub_media:
+ cmd += '-x'
cmd += [mnt]
ret = run_killable(cmd, None, killfuncs, \
lambda proc: proc.terminate())
@@ -213,26 +227,31 @@ def main():
a = (mnt, cond, running_devs, devs, killfuncs)
thr = threading.Thread(target = run_scrub, args = a)
thr.start()
- global retcode, terminate
+ global retcode, terminate, scrub_media
parser = argparse.ArgumentParser( \
description = "Scrub all mounted XFS filesystems.")
parser.add_argument("-V", help = "Report version and exit.", \
action = "store_true")
+ parser.add_argument("-x", help = "Scrub file data after filesystem metadata.", \
+ action = "store_true")
args = parser.parse_args()
if args.V:
print("xfs_scrub_all version @pkg_version@")
sys.exit(0)
+ scrub_media = args.x
+
fs = find_mounts()
# Tail the journal if we ourselves aren't a service...
journalthread = None
if 'SERVICE_MODE' not in os.environ:
try:
+ unitname = scrub_unitname('*')
cmd=['journalctl', '--no-pager', '-q', '-S', 'now', \
- '-f', '-u', 'xfs_scrub@*', '-o', \
+ '-f', '-u', unitname, '-o', \
'cat']
journalthread = subprocess.Popen(cmd)
except:
@@ -9,8 +9,11 @@
recipient="$1"
test -z "${recipient}" && exit 0
-mntpoint="$2"
+service="$2"
+test -z "${service}" && exit 0
+mntpoint="$3"
test -z "${mntpoint}" && exit 0
+
hostname="$(hostname -f 2>/dev/null)"
test -z "${hostname}" && hostname="${HOSTNAME}"
@@ -48,12 +51,12 @@ mntpoint_esc="$(escape_path "${mntpoint}")"
(cat << ENDL
To: $1
-From: <xfs_scrub@${hostname}>
-Subject: xfs_scrub failure on ${mntpoint}
+From: <${service}@${hostname}>
+Subject: ${service} failure on ${mntpoint}
-So sorry, the automatic xfs_scrub of ${mntpoint} on ${hostname} failed.
+So sorry, the automatic ${service} of ${mntpoint} on ${hostname} failed.
A log of what happened follows:
ENDL
-systemctl status --full --lines 4294967295 "xfs_scrub@${mntpoint_esc}") | "${mailer}" -t -i
+systemctl status --full --lines 4294967295 "${service}@${mntpoint_esc}") | "${mailer}" -t -i
exit "${PIPESTATUS[1]}"
@@ -10,7 +10,7 @@ Documentation=man:xfs_scrub(8)
[Service]
Type=oneshot
Environment=EMAIL_ADDR=root
-ExecStart=@pkg_lib_dir@/@pkg_name@/xfs_scrub_fail "${EMAIL_ADDR}" %I
+ExecStart=@pkg_lib_dir@/@pkg_name@/xfs_scrub_fail "${EMAIL_ADDR}" xfs_scrub %I
User=mail
Group=mail
SupplementaryGroups=systemd-journal
new file mode 100644
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2022 Oracle. All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+
+[Unit]
+Description=Online XFS Metadata and Media Check for %I
+OnFailure=xfs_scrub_media_fail@%i.service
+Documentation=man:xfs_scrub(8)
+
+[Service]
+Type=oneshot
+Environment=SERVICE_MODE=1
+Environment=SERVICE_MOUNTPOINT=/tmp/scrub
+ExecStart=@sbindir@/xfs_scrub @scrub_args@ -x %I
+SyslogIdentifier=%N
+
+# Run scrub with minimal CPU and IO priority so that nothing else will starve.
+IOSchedulingClass=idle
+CPUSchedulingPolicy=idle
+CPUAccounting=true
+Nice=19
+
+# Create the service underneath the scrub background service slice so that we
+# can control resource usage.
+Slice=system-xfs_scrub.slice
+
+# No realtime CPU scheduling
+RestrictRealtime=true
+
+# Dynamically create a user that isn't root
+DynamicUser=true
+
+# Make the entire filesystem readonly and /home inaccessible, then bind mount
+# the filesystem we're supposed to be checking into our private /tmp dir.
+# 'norbind' means that we don't bind anything under that original mount.
+ProtectSystem=strict
+ProtectHome=yes
+PrivateTmp=true
+BindPaths=/%I:/tmp/scrub:norbind
+
+# Don't let scrub complain about paths in /etc/projects that have been hidden
+# by our sandboxing. scrub doesn't care about project ids anyway.
+InaccessiblePaths=-/etc/projects
+
+# No network access
+PrivateNetwork=true
+ProtectHostname=true
+RestrictAddressFamilies=none
+IPAddressDeny=any
+
+# Don't let the program mess with the kernel configuration at all
+ProtectKernelLogs=true
+ProtectKernelModules=true
+ProtectKernelTunables=true
+ProtectControlGroups=true
+ProtectProc=invisible
+RestrictNamespaces=true
+
+# Hide everything in /proc, even /proc/mounts
+ProcSubset=pid
+
+# Only allow the default personality Linux
+LockPersonality=true
+
+# No writable memory pages
+MemoryDenyWriteExecute=true
+
+# Don't let our mounts leak out to the host
+PrivateMounts=true
+
+# Restrict system calls to the native arch and only enough to get things going
+SystemCallArchitectures=native
+SystemCallFilter=@system-service
+SystemCallFilter=~@privileged
+SystemCallFilter=~@resources
+SystemCallFilter=~@mount
+
+# xfs_scrub needs these privileges to run, and no others
+CapabilityBoundingSet=CAP_SYS_ADMIN CAP_FOWNER CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_SYS_RAWIO
+AmbientCapabilities=CAP_SYS_ADMIN CAP_FOWNER CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_SYS_RAWIO
+NoNewPrivileges=true
+
+# xfs_scrub doesn't create files
+UMask=7777
+
+# No access to hardware /dev files except for block devices
+ProtectClock=true
+DevicePolicy=closed
+DeviceAllow=block-*
new file mode 100644
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2022 Oracle. All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+
+[Unit]
+Description=Online XFS Metadata and Media Check Failure Reporting for %I
+Documentation=man:xfs_scrub(8)
+
+[Service]
+Type=oneshot
+Environment=EMAIL_ADDR=root
+ExecStart=@pkg_lib_dir@/@pkg_name@/xfs_scrub_fail "${EMAIL_ADDR}" xfs_scrub_media %I
+User=mail
+Group=mail
+SupplementaryGroups=systemd-journal
+
+# Create the service underneath the scrub background service slice so that we
+# can control resource usage.
+Slice=system-xfs_scrub.slice
+
+# No realtime scheduling
+RestrictRealtime=true
+
+# Make the entire filesystem readonly and /home inaccessible, then bind mount
+# the filesystem we're supposed to be checking into our private /tmp dir.
+ProtectSystem=full
+ProtectHome=yes
+PrivateTmp=true
+RestrictSUIDSGID=true
+
+# Emailing reports requires network access, but not the ability to change the
+# hostname.
+ProtectHostname=true
+
+# Don't let the program mess with the kernel configuration at all
+ProtectKernelLogs=true
+ProtectKernelModules=true
+ProtectKernelTunables=true
+ProtectControlGroups=true
+ProtectProc=invisible
+RestrictNamespaces=true
+
+# Can't hide /proc because journalctl needs it to find various pieces of log
+# information
+#ProcSubset=pid
+
+# Only allow the default personality Linux
+LockPersonality=true
+
+# No writable memory pages
+MemoryDenyWriteExecute=true
+
+# Don't let our mounts leak out to the host
+PrivateMounts=true
+
+# Restrict system calls to the native arch and only enough to get things going
+SystemCallArchitectures=native
+SystemCallFilter=@system-service
+SystemCallFilter=~@privileged
+SystemCallFilter=~@resources
+SystemCallFilter=~@mount
+
+# xfs_scrub needs these privileges to run, and no others
+CapabilityBoundingSet=
+NoNewPrivileges=true
+
+# Failure reporting shouldn't create world-readable files
+UMask=0077
+
+# Clean up any IPC objects when this unit stops
+RemoveIPC=true
+
+# No access to hardware device files
+PrivateDevices=true
+ProtectClock=true