@@ -85,6 +85,23 @@ config XFS_ONLINE_SCRUB
If unsure, say N.
+config XFS_ONLINE_REPAIR
+ bool "XFS online metadata repair support"
+ default n
+ depends on XFS_FS && XFS_ONLINE_SCRUB
+ help
+ If you say Y here you will be able to repair metadata on a
+ mounted XFS filesystem. This feature is intended to reduce
+ filesystem downtime even further by fixing minor problems
+ before they cause the filesystem to go down. However, it
+ requires that the filesystem be formatted with secondary
+ metadata, such as reverse mappings and inode parent pointers.
+
+ This feature is considered EXPERIMENTAL. Use with caution!
+
+ See the xfs_scrub man page in section 8 for additional information.
+
+ If unsure, say N.
config XFS_WARN
bool "XFS Verbose Warnings"
depends on XFS_FS && !XFS_DEBUG
@@ -170,4 +170,11 @@ xfs-y += $(addprefix scrub/, \
xfs-$(CONFIG_XFS_RT) += scrub/rtbitmap.o
xfs-$(CONFIG_XFS_QUOTA) += scrub/quota.o
+
+# online repair
+ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
+xfs-y += $(addprefix scrub/, \
+ repair.o \
+ )
+endif
endif
@@ -65,7 +65,8 @@
#define XFS_ERRTAG_LOG_BAD_CRC 29
#define XFS_ERRTAG_LOG_ITEM_PIN 30
#define XFS_ERRTAG_BUF_LRU_REF 31
-#define XFS_ERRTAG_MAX 32
+#define XFS_ERRTAG_FORCE_SCRUB_REPAIR 32
+#define XFS_ERRTAG_MAX 33
/*
* Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -102,5 +103,6 @@
#define XFS_RANDOM_LOG_BAD_CRC 1
#define XFS_RANDOM_LOG_ITEM_PIN 1
#define XFS_RANDOM_BUF_LRU_REF 2
+#define XFS_RANDOM_FORCE_SCRUB_REPAIR 1
#endif /* __XFS_ERRORTAG_H_ */
new file mode 100644
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2018 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
+#include "xfs_alloc.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
+#include "xfs_extent_busy.h"
+#include "xfs_ag_resv.h"
+#include "xfs_trans_space.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+
+/*
+ * Repair probe -- userspace uses this to probe if we're willing to repair a
+ * given mountpoint.
+ */
+int
+xfs_repair_probe(
+ struct xfs_scrub_context *sc,
+ uint32_t scrub_oflags)
+{
+ int error = 0;
+
+ if (xfs_scrub_should_terminate(sc, &error))
+ return error;
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2018 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#ifndef __XFS_SCRUB_REPAIR_H__
+#define __XFS_SCRUB_REPAIR_H__
+
+#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)
+
+/* Online repair only works for v5 filesystems. */
+static inline bool xfs_repair_can_fix(struct xfs_mount *mp)
+{
+ return xfs_sb_version_hascrc(&mp->m_sb);
+}
+
+/* Did userspace want us to repair /and/ we found something to fix? */
+static inline bool xfs_repair_should_fix(struct xfs_scrub_metadata *sm)
+{
+ return (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
+ (sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
+ XFS_SCRUB_OFLAG_XCORRUPT |
+ XFS_SCRUB_OFLAG_PREEN));
+}
+
+int xfs_repair_probe(struct xfs_scrub_context *sc, uint32_t scrub_oflags);
+
+#else
+
+# define xfs_repair_can_fix(mp) (false)
+# define xfs_repair_should_fix(sm) (false)
+# define xfs_repair_probe (NULL)
+
+#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
+
+#endif /* __XFS_SCRUB_REPAIR_H__ */
@@ -42,11 +42,16 @@
#include "xfs_refcount_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
+#include "xfs_errortag.h"
+#include "xfs_error.h"
+#include "xfs_log.h"
+#include "xfs_trans_priv.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/btree.h"
+#include "scrub/repair.h"
/*
* Online Scrub and Repair
@@ -120,6 +125,24 @@
* XCORRUPT flag; btree query function errors are noted by setting the
* XFAIL flag and deleting the cursor to prevent further attempts to
* cross-reference with a defective btree.
+ *
+ * If a piece of metadata proves corrupt or suboptimal, the userspace
+ * program can ask the kernel to apply some tender loving care (TLC) to
+ * the metadata object by setting the REPAIR flag and re-calling the
+ * scrub ioctl. "Corruption" is defined by metadata violating the
+ * on-disk specification; operations cannot continue if the violation is
+ * left untreated. It is possible for XFS to continue if an object is
+ * "suboptimal", however performance may be degraded. Repairs are
+ * usually performed by rebuilding the metadata entirely out of
+ * redundant metadata. Optimizing, on the other hand, can sometimes be
+ * done without rebuilding entire structures.
+ *
+ * Generally speaking, the repair code has the following code structure:
+ * Lock -> scrub -> repair -> commit -> re-lock -> re-scrub -> unlock.
+ * The first check helps us figure out if we need to rebuild or simply
+ * optimize the structure so that the rebuild knows what to do. The
+ * second check evaluates the completeness of the repair; that is what
+ * is reported to userspace.
*/
/*
@@ -155,7 +178,10 @@ xfs_scrub_teardown(
{
xfs_scrub_ag_free(sc, &sc->sa);
if (sc->tp) {
- xfs_trans_cancel(sc->tp);
+ if (error == 0 && (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
+ error = xfs_trans_commit(sc->tp);
+ else
+ xfs_trans_cancel(sc->tp);
sc->tp = NULL;
}
if (sc->ip) {
@@ -180,6 +206,7 @@ static const struct xfs_scrub_meta_ops meta_scrub_ops[] = {
.type = ST_NONE,
.setup = xfs_scrub_setup_fs,
.scrub = xfs_scrub_probe,
+ .repair = xfs_repair_probe,
},
[XFS_SCRUB_TYPE_SB] = { /* superblock */
.type = ST_PERAG,
@@ -379,9 +406,17 @@ xfs_scrub_validate_inputs(
if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
goto out;
- /* We don't know how to repair anything yet. */
- if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
- goto out;
+ /* Can we repair it? */
+ if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
+ /* Only allow repair for metadata we know how to fix. */
+ error = -EOPNOTSUPP;
+ if (!xfs_repair_can_fix(mp) || ops->repair == NULL)
+ goto out;
+
+ error = -EROFS;
+ if (mp->m_flags & XFS_MOUNT_RDONLY)
+ goto out;
+ }
error = 0;
out:
@@ -396,7 +431,11 @@ xfs_scrub_metadata(
{
struct xfs_scrub_context sc;
struct xfs_mount *mp = ip->i_mount;
+ char *errstr;
bool try_harder = false;
+ bool already_fixed = false;
+ bool was_corrupt = false;
+ uint32_t scrub_oflags;
int error = 0;
BUILD_BUG_ON(sizeof(meta_scrub_ops) !=
@@ -446,9 +485,60 @@ xfs_scrub_metadata(
} else if (error)
goto out_teardown;
+ /* Let debug users force us into the repair routines. */
+ if ((sc.sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && !already_fixed &&
+ XFS_TEST_ERROR(false, mp,
+ XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
+ sc.sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ }
+ if (!already_fixed)
+ was_corrupt = !!(sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
+ XFS_SCRUB_OFLAG_XCORRUPT));
+
+ if (!already_fixed && xfs_repair_should_fix(sc.sm)) {
+ xfs_scrub_ag_btcur_free(&sc.sa);
+
+ /*
+ * Repair whatever's broken. We have to clear the out
+ * flags because some of our iterator functions abort if
+ * any of the corruption flags are set.
+ */
+ trace_xfs_repair_attempt(ip, sc.sm, error);
+ scrub_oflags = sc.sm->sm_flags & XFS_SCRUB_FLAGS_OUT;
+ sc.sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
+ error = sc.ops->repair(&sc, scrub_oflags);
+ trace_xfs_repair_done(ip, sc.sm, error);
+ if (!try_harder && error == -EDEADLOCK) {
+ error = xfs_scrub_teardown(&sc, ip, 0);
+ if (error)
+ goto out;
+ try_harder = true;
+ goto retry_op;
+ } else if (error)
+ goto out_teardown;
+
+ /*
+ * Commit the fixes and perform a second dry-run scrub
+ * so that we can tell userspace if we fixed the problem.
+ */
+ error = xfs_scrub_teardown(&sc, ip, error);
+ if (error)
+ goto out;
+ already_fixed = true;
+ goto retry_op;
+ }
+
if (sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
- XFS_SCRUB_OFLAG_XCORRUPT))
- xfs_alert_ratelimited(mp, "Corruption detected during scrub.");
+ XFS_SCRUB_OFLAG_XCORRUPT)) {
+ if (sc.sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
+ errstr = "Corruption not fixed during online repair. "
+ "Unmount and run xfs_repair.";
+ else
+ errstr = "Corruption detected during scrub.";
+ xfs_alert_ratelimited(mp, errstr);
+ } else if (already_fixed && was_corrupt) {
+ xfs_alert_ratelimited(mp, "Corruption repaired during scrub.");
+ }
out_teardown:
error = xfs_scrub_teardown(&sc, ip, error);
@@ -38,6 +38,13 @@ struct xfs_scrub_meta_ops {
/* Examine metadata for errors. */
int (*scrub)(struct xfs_scrub_context *);
+ /*
+ * Repair the metadata. The outflags are cleared from the scrub
+ * context (so that the iterator functions will not abort early) and
+ * passed in as the second argument.
+ */
+ int (*repair)(struct xfs_scrub_context *, uint32_t);
+
/* Decide if we even have this piece of metadata. */
bool (*has)(struct xfs_sb *);
@@ -61,6 +61,7 @@ static unsigned int xfs_errortag_random_default[] = {
XFS_RANDOM_LOG_BAD_CRC,
XFS_RANDOM_LOG_ITEM_PIN,
XFS_RANDOM_BUF_LRU_REF,
+ XFS_RANDOM_FORCE_SCRUB_REPAIR,
};
struct xfs_errortag_attr {
@@ -167,6 +168,7 @@ XFS_ERRORTAG_ATTR_RW(drop_writes, XFS_ERRTAG_DROP_WRITES);
XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC);
XFS_ERRORTAG_ATTR_RW(log_item_pin, XFS_ERRTAG_LOG_ITEM_PIN);
XFS_ERRORTAG_ATTR_RW(buf_lru_ref, XFS_ERRTAG_BUF_LRU_REF);
+XFS_ERRORTAG_ATTR_RW(force_repair, XFS_ERRTAG_FORCE_SCRUB_REPAIR);
static struct attribute *xfs_errortag_attrs[] = {
XFS_ERRORTAG_ATTR_LIST(noerror),
@@ -201,6 +203,7 @@ static struct attribute *xfs_errortag_attrs[] = {
XFS_ERRORTAG_ATTR_LIST(log_bad_crc),
XFS_ERRORTAG_ATTR_LIST(log_item_pin),
XFS_ERRORTAG_ATTR_LIST(buf_lru_ref),
+ XFS_ERRORTAG_ATTR_LIST(force_repair),
NULL,
};