@@ -141,6 +141,16 @@ verify_inode_chunk(xfs_mount_t *mp,
_("uncertain inode block %d/%d already known\n"),
agno, agbno);
break;
+ case XR_E_METADATA:
+ /*
+ * Files in the metadata directory tree are always
+ * reconstructed, so it's ok to let go if this block
+ * is also a valid inode cluster.
+ */
+ do_warn(
+ _("inode block %d/%d claimed by metadata file\n"),
+ agno, agbno);
+ fallthrough;
case XR_E_UNKNOWN:
case XR_E_FREE1:
case XR_E_FREE:
@@ -430,6 +440,7 @@ verify_inode_chunk(xfs_mount_t *mp,
set_bmap_ext(agno, cur_agbno, blen, XR_E_MULT);
pthread_mutex_unlock(&ag_locks[agno].lock);
return 0;
+ case XR_E_METADATA:
case XR_E_INO:
do_error(
_("uncertain inode block overlap, agbno = %d, ino = %" PRIu64 "\n"),
@@ -474,6 +485,16 @@ verify_inode_chunk(xfs_mount_t *mp,
_("uncertain inode block %" PRIu64 " already known\n"),
XFS_AGB_TO_FSB(mp, agno, cur_agbno));
break;
+ case XR_E_METADATA:
+ /*
+ * Files in the metadata directory tree are always
+ * reconstructed, so it's ok to let go if this block
+ * is also a valid inode cluster.
+ */
+ do_warn(
+ _("inode block %d/%d claimed by metadata file\n"),
+ agno, agbno);
+ fallthrough;
case XR_E_UNKNOWN:
case XR_E_FREE1:
case XR_E_FREE:
@@ -559,6 +580,16 @@ process_inode_agbno_state(
switch (state) {
case XR_E_INO: /* already marked */
break;
+ case XR_E_METADATA:
+ /*
+ * Files in the metadata directory tree are always
+ * reconstructed, so it's ok to let go if this block is also a
+ * valid inode cluster.
+ */
+ do_warn(
+ _("inode block %d/%d claimed by metadata file\n"),
+ agno, agbno);
+ fallthrough;
case XR_E_UNKNOWN:
case XR_E_FREE:
case XR_E_FREE1:
@@ -220,6 +220,7 @@ static int
process_rt_rec_state(
struct xfs_mount *mp,
xfs_ino_t ino,
+ bool zap_metadata,
struct xfs_bmbt_irec *irec)
{
xfs_fsblock_t b = irec->br_startblock;
@@ -256,7 +257,13 @@ _("data fork in rt inode %" PRIu64 " found invalid rt extent %"PRIu64" state %d
switch (state) {
case XR_E_FREE:
case XR_E_UNKNOWN:
- set_rtbmap(ext, XR_E_INUSE);
+ set_rtbmap(ext, zap_metadata ? XR_E_METADATA :
+ XR_E_INUSE);
+ break;
+ case XR_E_METADATA:
+ do_error(
+_("data fork in rt inode %" PRIu64 " found metadata file block %" PRIu64 " in rt bmap\n"),
+ ino, ext);
break;
case XR_E_BAD_STATE:
do_error(
@@ -293,7 +300,8 @@ process_rt_rec(
struct xfs_bmbt_irec *irec,
xfs_ino_t ino,
xfs_rfsblock_t *tot,
- int check_dups)
+ int check_dups,
+ bool zap_metadata)
{
xfs_fsblock_t lastb;
int bad;
@@ -333,7 +341,7 @@ _("inode %" PRIu64 " - bad rt extent overflows - start %" PRIu64 ", "
if (check_dups)
bad = process_rt_rec_dups(mp, ino, irec);
else
- bad = process_rt_rec_state(mp, ino, irec);
+ bad = process_rt_rec_state(mp, ino, zap_metadata, irec);
if (bad)
return bad;
@@ -364,7 +372,8 @@ process_bmbt_reclist_int(
xfs_fileoff_t *first_key,
xfs_fileoff_t *last_key,
int check_dups,
- int whichfork)
+ int whichfork,
+ bool zap_metadata)
{
xfs_bmbt_irec_t irec;
xfs_filblks_t cp = 0; /* prev count */
@@ -443,7 +452,8 @@ _("zero length extent (off = %" PRIu64 ", fsbno = %" PRIu64 ") in ino %" PRIu64
if (type == XR_INO_RTDATA && whichfork == XFS_DATA_FORK) {
pthread_mutex_lock(&rt_lock.lock);
- error2 = process_rt_rec(mp, &irec, ino, tot, check_dups);
+ error2 = process_rt_rec(mp, &irec, ino, tot, check_dups,
+ zap_metadata);
pthread_mutex_unlock(&rt_lock.lock);
if (error2)
return error2;
@@ -558,6 +568,11 @@ _("%s fork in ino %" PRIu64 " claims free block %" PRIu64 "\n"),
case XR_E_INUSE_FS1:
do_warn(_("rmap claims metadata use!\n"));
fallthrough;
+ case XR_E_METADATA:
+ do_warn(
+_("%s fork in inode %" PRIu64 " claims metadata file block %" PRIu64 "\n"),
+ forkname, ino, b);
+ break;
case XR_E_FS_MAP:
case XR_E_INO:
case XR_E_INUSE_FS:
@@ -614,15 +629,28 @@ _("illegal state %d in block map %" PRIu64 "\n"),
for (; agbno < ebno; agbno += blen) {
state = get_bmap_ext(agno, agbno, ebno, &blen);
switch (state) {
+ case XR_E_METADATA:
+ /*
+ * The entire metadata directory tree is rebuilt
+ * every time, so we can let regular files take
+ * ownership of this block.
+ */
+ if (zap_metadata)
+ break;
+ fallthrough;
case XR_E_FREE:
case XR_E_FREE1:
case XR_E_INUSE1:
case XR_E_UNKNOWN:
- set_bmap_ext(agno, agbno, blen, XR_E_INUSE);
+ set_bmap_ext(agno, agbno, blen, zap_metadata ?
+ XR_E_METADATA : XR_E_INUSE);
break;
+
case XR_E_INUSE:
case XR_E_MULT:
- set_bmap_ext(agno, agbno, blen, XR_E_MULT);
+ if (!zap_metadata)
+ set_bmap_ext(agno, agbno, blen,
+ XR_E_MULT);
break;
default:
break;
@@ -661,10 +689,12 @@ process_bmbt_reclist(
blkmap_t **blkmapp,
xfs_fileoff_t *first_key,
xfs_fileoff_t *last_key,
- int whichfork)
+ int whichfork,
+ bool zap_metadata)
{
return process_bmbt_reclist_int(mp, rp, numrecs, type, ino, tot,
- blkmapp, first_key, last_key, 0, whichfork);
+ blkmapp, first_key, last_key, 0, whichfork,
+ zap_metadata);
}
/*
@@ -679,13 +709,15 @@ scan_bmbt_reclist(
int type,
xfs_ino_t ino,
xfs_rfsblock_t *tot,
- int whichfork)
+ int whichfork,
+ bool zap_metadata)
{
xfs_fileoff_t first_key = 0;
xfs_fileoff_t last_key = 0;
return process_bmbt_reclist_int(mp, rp, numrecs, type, ino, tot,
- NULL, &first_key, &last_key, 1, whichfork);
+ NULL, &first_key, &last_key, 1, whichfork,
+ zap_metadata);
}
/*
@@ -760,7 +792,8 @@ process_btinode(
xfs_extnum_t *nex,
blkmap_t **blkmapp,
int whichfork,
- int check_dups)
+ int check_dups,
+ bool zap_metadata)
{
xfs_bmdr_block_t *dib;
xfs_fileoff_t last_key;
@@ -839,8 +872,8 @@ _("bad bmap btree ptr 0x%" PRIx64 " in ino %" PRIu64 "\n"),
if (scan_lbtree(get_unaligned_be64(&pp[i]), level, scan_bmapbt,
type, whichfork, lino, tot, nex, blkmapp,
- &cursor, 1, check_dups, magic, NULL,
- &xfs_bmbt_buf_ops))
+ &cursor, 1, check_dups, magic,
+ (void *)zap_metadata, &xfs_bmbt_buf_ops))
return(1);
/*
* fix key (offset) mismatches between the keys in root
@@ -935,7 +968,8 @@ process_exinode(
xfs_extnum_t *nex,
blkmap_t **blkmapp,
int whichfork,
- int check_dups)
+ int check_dups,
+ bool zap_metadata)
{
xfs_ino_t lino;
xfs_bmbt_rec_t *rp;
@@ -969,10 +1003,10 @@ process_exinode(
if (check_dups == 0)
ret = process_bmbt_reclist(mp, rp, &numrecs, type, lino,
tot, blkmapp, &first_key, &last_key,
- whichfork);
+ whichfork, zap_metadata);
else
ret = scan_bmbt_reclist(mp, rp, &numrecs, type, lino, tot,
- whichfork);
+ whichfork, zap_metadata);
*nex = numrecs;
return ret;
@@ -1901,7 +1935,8 @@ process_inode_data_fork(
xfs_extnum_t *nextents,
blkmap_t **dblkmap,
int check_dups,
- struct xfs_buf **ino_bpp)
+ struct xfs_buf **ino_bpp,
+ bool zap_metadata)
{
struct xfs_dinode *dino = *dinop;
xfs_ino_t lino = XFS_AGINO_TO_INO(mp, agno, ino);
@@ -1948,14 +1983,14 @@ process_inode_data_fork(
try_rebuild = 1;
err = process_exinode(mp, agno, ino, dino, type, dirty,
totblocks, nextents, dblkmap, XFS_DATA_FORK,
- check_dups);
+ check_dups, zap_metadata);
break;
case XFS_DINODE_FMT_BTREE:
if (!rmapbt_suspect && try_rebuild == -1)
try_rebuild = 1;
err = process_btinode(mp, agno, ino, dino, type, dirty,
totblocks, nextents, dblkmap, XFS_DATA_FORK,
- check_dups);
+ check_dups, zap_metadata);
break;
case XFS_DINODE_FMT_DEV:
err = 0;
@@ -2008,12 +2043,12 @@ _("would have tried to rebuild inode %"PRIu64" data fork\n"),
case XFS_DINODE_FMT_EXTENTS:
err = process_exinode(mp, agno, ino, dino, type,
dirty, totblocks, nextents, dblkmap,
- XFS_DATA_FORK, 0);
+ XFS_DATA_FORK, 0, zap_metadata);
break;
case XFS_DINODE_FMT_BTREE:
err = process_btinode(mp, agno, ino, dino, type,
dirty, totblocks, nextents, dblkmap,
- XFS_DATA_FORK, 0);
+ XFS_DATA_FORK, 0, zap_metadata);
break;
case XFS_DINODE_FMT_DEV:
err = 0;
@@ -2048,7 +2083,8 @@ process_inode_attr_fork(
int check_dups,
int extra_attr_check,
int *retval,
- struct xfs_buf **ino_bpp)
+ struct xfs_buf **ino_bpp,
+ bool zap_metadata)
{
xfs_ino_t lino = XFS_AGINO_TO_INO(mp, agno, ino);
struct xfs_dinode *dino = *dinop;
@@ -2096,7 +2132,7 @@ process_inode_attr_fork(
*anextents = 0;
err = process_exinode(mp, agno, ino, dino, type, dirty,
atotblocks, anextents, &ablkmap,
- XFS_ATTR_FORK, check_dups);
+ XFS_ATTR_FORK, check_dups, zap_metadata);
break;
case XFS_DINODE_FMT_BTREE:
if (!rmapbt_suspect && try_rebuild == -1)
@@ -2105,7 +2141,7 @@ process_inode_attr_fork(
*anextents = 0;
err = process_btinode(mp, agno, ino, dino, type, dirty,
atotblocks, anextents, &ablkmap,
- XFS_ATTR_FORK, check_dups);
+ XFS_ATTR_FORK, check_dups, zap_metadata);
break;
default:
do_warn(_("illegal attribute format %d, ino %" PRIu64 "\n"),
@@ -2169,12 +2205,12 @@ _("would have tried to rebuild inode %"PRIu64" attr fork or cleared it\n"),
case XFS_DINODE_FMT_EXTENTS:
err = process_exinode(mp, agno, ino, dino,
type, dirty, atotblocks, anextents,
- &ablkmap, XFS_ATTR_FORK, 0);
+ &ablkmap, XFS_ATTR_FORK, 0, zap_metadata);
break;
case XFS_DINODE_FMT_BTREE:
err = process_btinode(mp, agno, ino, dino,
type, dirty, atotblocks, anextents,
- &ablkmap, XFS_ATTR_FORK, 0);
+ &ablkmap, XFS_ATTR_FORK, 0, zap_metadata);
break;
default:
do_error(_("illegal attribute fmt %d, ino %" PRIu64 "\n"),
@@ -2393,6 +2429,7 @@ process_dinode_int(
xfs_agino_t unlinked_ino;
struct xfs_perag *pag;
bool is_meta = false;
+ bool zap_metadata = false;
*dirty = *isa_dir = 0;
*used = is_used;
@@ -2982,6 +3019,33 @@ _("Bad CoW extent size %u on inode %" PRIu64 ", "),
off = get_inode_offset(mp, lino, irec);
set_inode_is_meta(irec, off);
is_meta = true;
+
+ /*
+ * We always rebuild the metadata directory tree during phase
+ * 6, so we use this flag to get all the directory blocks
+ * marked as free, and any other metadata files whose contents
+ * we don't want to save.
+ *
+ * Currently, there are no metadata files that use xattrs, so
+ * we always drop the xattr blocks of metadata files. Parent
+ * pointers will be rebuilt during phase 6.
+ */
+ switch (type) {
+ case XR_INO_RTBITMAP:
+ case XR_INO_RTSUM:
+ case XR_INO_UQUOTA:
+ case XR_INO_GQUOTA:
+ case XR_INO_PQUOTA:
+ /*
+ * This inode was recognized as being filesystem
+ * metadata, so preserve the inode and its contents for
+ * later checking and repair.
+ */
+ break;
+ default:
+ zap_metadata = true;
+ break;
+ }
}
/*
@@ -2989,7 +3053,7 @@ _("Bad CoW extent size %u on inode %" PRIu64 ", "),
*/
if (process_inode_data_fork(mp, agno, ino, dinop, type, dirty,
&totblocks, &nextents, &dblkmap, check_dups,
- ino_bpp) != 0)
+ ino_bpp, zap_metadata) != 0)
goto bad_out;
dino = *dinop;
@@ -2999,7 +3063,7 @@ _("Bad CoW extent size %u on inode %" PRIu64 ", "),
*/
if (process_inode_attr_fork(mp, agno, ino, dinop, type, dirty,
&atotblocks, &anextents, check_dups, extra_attr_check,
- &retval, ino_bpp))
+ &retval, ino_bpp, is_meta))
goto bad_out;
dino = *dinop;
@@ -27,7 +27,8 @@ process_bmbt_reclist(xfs_mount_t *mp,
struct blkmap **blkmapp,
uint64_t *first_key,
uint64_t *last_key,
- int whichfork);
+ int whichfork,
+ bool zap_metadata);
int
scan_bmbt_reclist(
@@ -37,7 +38,8 @@ scan_bmbt_reclist(
int type,
xfs_ino_t ino,
xfs_rfsblock_t *tot,
- int whichfork);
+ int whichfork,
+ bool zap_metadata);
void
update_rootino(xfs_mount_t *mp);
@@ -85,18 +85,25 @@ typedef struct rt_extent_tree_node {
#define XR_E_UNKNOWN 0 /* unknown state */
#define XR_E_FREE1 1 /* free block (marked by one fs space tree) */
#define XR_E_FREE 2 /* free block (marked by both fs space trees) */
-#define XR_E_INUSE 3 /* extent used by file/dir data or metadata */
-#define XR_E_INUSE_FS 4 /* extent used by fs ag header or log */
-#define XR_E_MULT 5 /* extent is multiply referenced */
-#define XR_E_INO 6 /* extent used by inodes (inode blocks) */
-#define XR_E_FS_MAP 7 /* extent used by fs space/inode maps */
-#define XR_E_INUSE1 8 /* used block (marked by rmap btree) */
-#define XR_E_INUSE_FS1 9 /* used by fs ag header or log (rmap btree) */
-#define XR_E_INO1 10 /* used by inodes (marked by rmap btree) */
-#define XR_E_FS_MAP1 11 /* used by fs space/inode maps (rmap btree) */
-#define XR_E_REFC 12 /* used by fs ag reference count btree */
-#define XR_E_COW 13 /* leftover cow extent */
-#define XR_E_BAD_STATE 14
+/*
+ * Space used by metadata files. The entire metadata directory tree will be
+ * rebuilt from scratch during phase 6, so this value must be less than
+ * XR_E_INUSE so that the space will go back to the free space btrees during
+ * phase 5.
+ */
+#define XR_E_METADATA 3
+#define XR_E_INUSE 4 /* extent used by file/dir data or metadata */
+#define XR_E_INUSE_FS 5 /* extent used by fs ag header or log */
+#define XR_E_MULT 6 /* extent is multiply referenced */
+#define XR_E_INO 7 /* extent used by inodes (inode blocks) */
+#define XR_E_FS_MAP 8 /* extent used by fs space/inode maps */
+#define XR_E_INUSE1 9 /* used block (marked by rmap btree) */
+#define XR_E_INUSE_FS1 10 /* used by fs ag header or log (rmap btree) */
+#define XR_E_INO1 11 /* used by inodes (marked by rmap btree) */
+#define XR_E_FS_MAP1 12 /* used by fs space/inode maps (rmap btree) */
+#define XR_E_REFC 13 /* used by fs ag reference count btree */
+#define XR_E_COW 14 /* leftover cow extent */
+#define XR_E_BAD_STATE 15
/* separate state bit, OR'ed into high (4th) bit of ex_state field */
@@ -303,6 +303,7 @@ phase4(xfs_mount_t *mp)
_("unknown block state, ag %d, blocks %u-%u\n"),
i, j, j + blen - 1);
fallthrough;
+ case XR_E_METADATA:
case XR_E_UNKNOWN:
case XR_E_FREE:
case XR_E_INUSE:
@@ -335,6 +336,7 @@ phase4(xfs_mount_t *mp)
_("unknown rt extent state, extent %" PRIu64 "\n"),
rtx);
fallthrough;
+ case XR_E_METADATA:
case XR_E_UNKNOWN:
case XR_E_FREE1:
case XR_E_FREE:
@@ -227,6 +227,7 @@ scan_bmapbt(
xfs_agnumber_t agno;
xfs_agblock_t agbno;
int state;
+ bool zap_metadata = priv != NULL;
/*
* unlike the ag freeblock btrees, if anything looks wrong
@@ -352,7 +353,20 @@ _("bad back (left) sibling pointer (saw %llu should be NULL (0))\n"
case XR_E_UNKNOWN:
case XR_E_FREE1:
case XR_E_FREE:
- set_bmap(agno, agbno, XR_E_INUSE);
+ set_bmap(agno, agbno, zap_metadata ? XR_E_METADATA :
+ XR_E_INUSE);
+ break;
+ case XR_E_METADATA:
+ /*
+ * bmbt block already claimed by a metadata file. We
+ * always reconstruct the entire metadata tree, so if
+ * this is a regular file we mark it owned by the file.
+ */
+ do_warn(
+_("inode 0x%" PRIx64 "bmap block 0x%" PRIx64 " claimed by metadata file\n"),
+ ino, bno);
+ if (!zap_metadata)
+ set_bmap(agno, agbno, XR_E_INUSE);
break;
case XR_E_FS_MAP:
case XR_E_INUSE:
@@ -364,7 +378,8 @@ _("bad back (left) sibling pointer (saw %llu should be NULL (0))\n"
* we made it here, the block probably
* contains btree data.
*/
- set_bmap(agno, agbno, XR_E_MULT);
+ if (!zap_metadata)
+ set_bmap(agno, agbno, XR_E_MULT);
do_warn(
_("inode 0x%" PRIx64 "bmap block 0x%" PRIx64 " claimed, state is %d\n"),
ino, bno, state);
@@ -429,7 +444,8 @@ _("inode %" PRIu64 " bad # of bmap records (%" PRIu64 ", min - %u, max - %u)\n")
if (check_dups == 0) {
err = process_bmbt_reclist(mp, rp, &numrecs, type, ino,
tot, blkmapp, &first_key,
- &last_key, whichfork);
+ &last_key, whichfork,
+ zap_metadata);
if (err)
return 1;
@@ -459,7 +475,7 @@ _("out-of-order bmap key (file offset) in inode %" PRIu64 ", %s fork, fsbno %" P
return 0;
} else {
return scan_bmbt_reclist(mp, rp, &numrecs, type, ino,
- tot, whichfork);
+ tot, whichfork, zap_metadata);
}
}
if (numrecs > mp->m_bmap_dmxr[1] || (isroot == 0 && numrecs <
@@ -849,6 +865,12 @@ process_rmap_rec(
break;
}
break;
+ case XR_E_METADATA:
+ do_warn(
+_("Metadata file block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
+ agno, b, b + blen - 1,
+ name, state, owner);
+ break;
case XR_E_INUSE_FS:
if (owner == XFS_RMAP_OWN_FS ||
owner == XFS_RMAP_OWN_LOG)