@@ -69,6 +69,53 @@ defer_fs_repair(
return 0;
}
+/*
+ * If we couldn't check all the scheduled metadata items, try performing spot
+ * repairs until we check everything or stop making forward progress.
+ */
+static int
+repair_and_scrub_loop(
+ struct scrub_ctx *ctx,
+ struct scrub_item *sri,
+ const char *descr,
+ bool *defer)
+{
+ unsigned int to_check;
+ int ret;
+
+ *defer = false;
+ if (ctx->mode != SCRUB_MODE_REPAIR)
+ return 0;
+
+ to_check = scrub_item_count_needscheck(sri);
+ while (to_check > 0) {
+ unsigned int nr;
+
+ ret = repair_item_corruption(ctx, sri);
+ if (ret)
+ return ret;
+
+ ret = scrub_item_check(ctx, sri);
+ if (ret)
+ return ret;
+
+ nr = scrub_item_count_needscheck(sri);
+ if (nr == to_check) {
+ /*
+ * We cannot make forward scanning progress with this
+ * metadata, so defer the rest until phase 4.
+ */
+ str_info(ctx, descr,
+ _("Unable to make forward checking progress; will try again in phase 4."));
+ *defer = true;
+ return 0;
+ }
+ to_check = nr;
+ }
+
+ return 0;
+}
+
/* Scrub each AG's metadata btrees. */
static void
scan_ag_metadata(
@@ -82,6 +129,7 @@ scan_ag_metadata(
struct scan_ctl *sctl = arg;
char descr[DESCR_BUFSZ];
unsigned int difficulty;
+ bool defer_repairs;
int ret;
if (sctl->aborted)
@@ -97,10 +145,22 @@ scan_ag_metadata(
scrub_item_schedule_group(&sri, XFROG_SCRUB_GROUP_AGHEADER);
scrub_item_schedule_group(&sri, XFROG_SCRUB_GROUP_PERAG);
+ /*
+ * Try to check all of the AG metadata items that we just scheduled.
+ * If we return with some types still needing a check, try repairing
+ * any damaged metadata that we've found so far, and try again. Abort
+ * if we stop making forward progress.
+ */
ret = scrub_item_check(ctx, &sri);
if (ret)
goto err;
+ ret = repair_and_scrub_loop(ctx, &sri, descr, &defer_repairs);
+ if (ret)
+ goto err;
+ if (defer_repairs)
+ goto defer;
+
/*
* Figure out if we need to perform early fixing. The only
* reason we need to do this is if the inobt is broken, which
@@ -117,6 +177,7 @@ scan_ag_metadata(
if (ret)
goto err;
+defer:
/* Everything else gets fixed during phase 4. */
ret = defer_fs_repair(ctx, &sri);
if (ret)
@@ -137,11 +198,18 @@ scan_fs_metadata(
struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx;
struct scan_ctl *sctl = arg;
unsigned int difficulty;
+ bool defer_repairs;
int ret;
if (sctl->aborted)
goto out;
+ /*
+ * Try to check all of the metadata files that we just scheduled. If
+ * we return with some types still needing a check, try repairing any
+ * damaged metadata that we've found so far, and try again. Abort if
+ * we stop making forward progress.
+ */
scrub_item_init_fs(&sri);
scrub_item_schedule(&sri, type);
ret = scrub_item_check(ctx, &sri);
@@ -150,10 +218,20 @@ scan_fs_metadata(
goto out;
}
+ ret = repair_and_scrub_loop(ctx, &sri, xfrog_scrubbers[type].descr,
+ &defer_repairs);
+ if (ret) {
+ sctl->aborted = true;
+ goto out;
+ }
+ if (defer_repairs)
+ goto defer;
+
/* Complain about metadata corruptions that might not be fixable. */
difficulty = repair_item_difficulty(&sri);
warn_repair_difficulties(ctx, difficulty, xfrog_scrubbers[type].descr);
+defer:
ret = defer_fs_repair(ctx, &sri);
if (ret) {
sctl->aborted = true;
@@ -99,6 +99,58 @@ try_inode_repair(
return repair_file_corruption(ictx->ctx, sri, fd);
}
+/*
+ * If we couldn't check all the scheduled file metadata items, try performing
+ * spot repairs until we check everything or stop making forward progress.
+ */
+static int
+repair_and_scrub_inode_loop(
+ struct scrub_ctx *ctx,
+ struct xfs_bulkstat *bstat,
+ int fd,
+ struct scrub_item *sri,
+ bool *defer)
+{
+ unsigned int to_check;
+ int error;
+
+ *defer = false;
+ if (ctx->mode != SCRUB_MODE_REPAIR)
+ return 0;
+
+ to_check = scrub_item_count_needscheck(sri);
+ while (to_check > 0) {
+ unsigned int nr;
+
+ error = repair_file_corruption(ctx, sri, fd);
+ if (error)
+ return error;
+
+ error = scrub_item_check_file(ctx, sri, fd);
+ if (error)
+ return error;
+
+ nr = scrub_item_count_needscheck(sri);
+ if (nr == to_check) {
+ char descr[DESCR_BUFSZ];
+
+ /*
+ * We cannot make forward scanning progress with this
+ * inode, so defer the rest until phase 4.
+ */
+ scrub_render_ino_descr(ctx, descr, DESCR_BUFSZ,
+ bstat->bs_ino, bstat->bs_gen, NULL);
+ str_info(ctx, descr,
+ _("Unable to make forward checking progress; will try again in phase 4."));
+ *defer = true;
+ return 0;
+ }
+ to_check = nr;
+ }
+
+ return 0;
+}
+
/* Verify the contents, xattrs, and extent maps of an inode. */
static int
scrub_inode(
@@ -169,11 +221,28 @@ scrub_inode(
scrub_item_schedule(&sri, XFS_SCRUB_TYPE_XATTR);
scrub_item_schedule(&sri, XFS_SCRUB_TYPE_PARENT);
- /* Try to check and repair the file while it's open. */
+ /*
+ * Try to check all of the metadata items that we just scheduled. If
+ * we return with some types still needing a check and the space
+ * metadata isn't also in need of repairs, try repairing any damaged
+ * file metadata that we've found so far, and try checking the file
+ * again. Worst case, defer the repairs and the checks to phase 4 if
+ * we can't make any progress on anything.
+ */
error = scrub_item_check_file(ctx, &sri, fd);
if (error)
goto out;
+ if (!ictx->always_defer_repairs) {
+ bool defer_repairs;
+
+ error = repair_and_scrub_inode_loop(ctx, bstat, fd, &sri,
+ &defer_repairs);
+ if (error || defer_repairs)
+ goto out;
+ }
+
+ /* Try to repair the file while it's open. */
error = try_inode_repair(ictx, &sri, fd);
if (error)
goto out;
@@ -860,6 +860,7 @@ repair_item_to_action_item(
struct action_item **aitemp)
{
struct action_item *aitem;
+ unsigned int scrub_type;
if (repair_item_count_needsrepair(sri) == 0)
return 0;
@@ -875,6 +876,20 @@ repair_item_to_action_item(
INIT_LIST_HEAD(&aitem->list);
memcpy(&aitem->sri, sri, sizeof(struct scrub_item));
+ /*
+ * If the scrub item indicates that there is unchecked metadata, assume
+ * that the scrub type checker depends on something that couldn't be
+ * fixed. Mark that type as corrupt so that phase 4 will try it again.
+ */
+ foreach_scrub_type(scrub_type) {
+ __u8 *state = aitem->sri.sri_state;
+
+ if (state[scrub_type] & SCRUB_ITEM_NEEDSCHECK) {
+ state[scrub_type] &= ~SCRUB_ITEM_NEEDSCHECK;
+ state[scrub_type] |= SCRUB_ITEM_CORRUPT;
+ }
+ }
+
*aitemp = aitem;
return 0;
}