diff mbox series

[5/5] xfs: use atomic extent swapping to repair directories

Message ID 158812840567.169849.10189780953079405106.stgit@magnolia (mailing list archive)
State New, archived
Headers show
Series xfs: atomic file metadata repairs | expand

Commit Message

Darrick J. Wong April 29, 2020, 2:46 a.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

When repairing a directory online, stage the new directory contents in a
temporary file and use the atomic extent swapping mechanism to commit
the results in bulk.  As a side effect of this patch, directory
inactivation will be able to purge any leftover dir blocks.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_swapext.c |   34 ++++
 fs/xfs/scrub/dir.c          |   12 +
 fs/xfs/scrub/dir_repair.c   |  415 +++++++++++++++++++++++++++++++++++++++++--
 fs/xfs/xfs_inode.c          |   49 +++++
 4 files changed, 488 insertions(+), 22 deletions(-)
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_swapext.c b/fs/xfs/libxfs/xfs_swapext.c
index f16f7d9a0b66..afdc516428bd 100644
--- a/fs/xfs/libxfs/xfs_swapext.c
+++ b/fs/xfs/libxfs/xfs_swapext.c
@@ -23,6 +23,7 @@ 
 #include "xfs_da_format.h"
 #include "xfs_da_btree.h"
 #include "xfs_attr_leaf.h"
+#include "xfs_dir2_priv.h"
 
 /* Information to help us reset reflink flag / CoW fork state after a swap. */
 
@@ -233,6 +234,37 @@  xfs_swapext_attr_to_shortform2(
 	return xfs_attr3_leaf_to_shortform(bp, &args, forkoff);
 }
 
+/* Convert inode2's block dir fork back to shortform, if possible.. */
+STATIC int
+xfs_swapext_dir_to_shortform2(
+	struct xfs_trans		*tp,
+	struct xfs_swapext_intent	*sxi)
+{
+	struct xfs_da_args	args = {
+		.dp		= sxi->si_ip2,
+		.geo		= tp->t_mountp->m_dir_geo,
+		.whichfork	= XFS_DATA_FORK,
+		.trans		= tp,
+	};
+	struct xfs_dir2_sf_hdr	sfh;
+	struct xfs_buf		*bp;
+	int			size;
+	int			error;
+
+	if (!xfs_bmap_one_block(sxi->si_ip2, XFS_DATA_FORK))
+		return 0;
+
+	error = xfs_dir3_block_read(tp, sxi->si_ip2, &bp);
+	if (error)
+		return error;
+
+	size = xfs_dir2_block_sfsize(sxi->si_ip2, bp->b_addr, &sfh);
+	if (size > XFS_IFORK_DSIZE(sxi->si_ip2))
+		return 0;
+
+	return xfs_dir2_block_to_sf(&args, bp, size, &sfh);
+}
+
 #define XFS_SWAP_EXTENT_POST_PROCESSING (XFS_SWAP_EXTENT_TO_SHORTFORM2)
 
 /* Do we have more work to do to finish this operation? */
@@ -265,6 +297,8 @@  xfs_swapext_finish_one(
 		if (sxi->si_flags & XFS_SWAP_EXTENT_TO_SHORTFORM2) {
 			if (sxi->si_flags & XFS_SWAP_EXTENT_ATTR_FORK)
 				error = xfs_swapext_attr_to_shortform2(tp, sxi);
+			else if (S_ISDIR(VFS_I(sxi->si_ip2)->i_mode))
+				error = xfs_swapext_dir_to_shortform2(tp, sxi);
 			sxi->si_flags &= ~XFS_SWAP_EXTENT_TO_SHORTFORM2;
 			return error;
 		}
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index e318dd46cb15..948b7440e591 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -18,6 +18,7 @@ 
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/dabtree.h"
+#include "scrub/repair.h"
 
 /* Set us up to scrub directories. */
 int
@@ -28,6 +29,17 @@  xchk_setup_directory(
 	unsigned int		sz;
 	int			error;
 
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+	if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
+		if (!xfs_sb_version_hasatomicswap(&sc->mp->m_sb))
+			return -EOPNOTSUPP;
+
+		error = xrep_create_tempfile(sc, S_IFDIR);
+		if (error)
+			return error;
+	}
+#endif
+
 	if (sc->flags & XCHK_TRY_HARDER) {
 		error = xchk_fs_freeze(sc);
 		if (error)
diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c
index b299f8b35ce4..3004505c55a9 100644
--- a/fs/xfs/scrub/dir_repair.c
+++ b/fs/xfs/scrub/dir_repair.c
@@ -25,6 +25,8 @@ 
 #include "xfs_bmap_btree.h"
 #include "xfs_trans_space.h"
 #include "xfs_iwalk.h"
+#include "xfs_swapext.h"
+#include "xfs_bmap_util.h"
 #include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
@@ -78,6 +80,9 @@  struct xrep_dir {
 	 * found a good candidate.
 	 */
 	xfs_ino_t		parent_ino;
+
+	/* nlink value of the corrected directory. */
+	xfs_nlink_t		new_nlink;
 };
 
 /*
@@ -523,7 +528,6 @@  xrep_dir_reset_fork(
 	dp->i_d.di_size = 0;
 
 	/* Reinitialize the short form directory. */
-	set_nlink(VFS_I(dp), 2);
 	args->geo = sc->mp->m_dir_geo;
 	args->dp = dp;
 	args->trans = sc->tp;
@@ -610,10 +614,10 @@  xrep_dir_insert_rec(
 	if (error)
 		return error;
 
-	trace_xrep_dir_insert_rec(rd->sc->ip, namebuf, key->namelen, key->ino,
-			key->ftype);
+	trace_xrep_dir_insert_rec(rd->sc->tempip, namebuf, key->namelen,
+			key->ino, key->ftype);
 
-	error = xfs_qm_dqattach(rd->sc->ip);
+	error = xfs_qm_dqattach(rd->sc->tempip);
 	if (error)
 		return error;
 
@@ -626,18 +630,19 @@  xrep_dir_insert_rec(
 	if (error)
 		return error;
 
-	xfs_ilock(rd->sc->ip, XFS_ILOCK_EXCL);
-	xfs_trans_ijoin(tp, rd->sc->ip, XFS_ILOCK_EXCL);
+	xfs_ilock(rd->sc->tempip, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, rd->sc->tempip, XFS_ILOCK_EXCL);
 
 	name.len = key->namelen;
 	name.type = key->ftype;
-	error = xfs_dir_createname(tp, rd->sc->ip, &name, key->ino, resblks);
+	error = xfs_dir_createname(tp, rd->sc->tempip, &name, key->ino,
+			resblks);
 	if (error)
 		goto err;
 
 	if (name.type == XFS_DIR3_FT_DIR)
-		inc_nlink(VFS_I(rd->sc->ip));
-	xfs_trans_log_inode(tp, rd->sc->ip, XFS_ILOG_CORE);
+		rd->new_nlink++;
+	xfs_trans_log_inode(tp, rd->sc->tempip, XFS_ILOG_CORE);
 	return xfs_trans_commit(tp);
 
 err:
@@ -645,6 +650,356 @@  xrep_dir_insert_rec(
 	return error;
 }
 
+/*
+ * Prepare both inodes' directory forks for extent swapping.  Promote the
+ * tempfile from short format to leaf format, and if the file being repaired
+ * has a short format attr fork, turn it into an empty extent list.
+ */
+STATIC int
+xrep_dir_swap_prep(
+	struct xfs_scrub	*sc,
+	bool			temp_local,
+	bool			ip_local)
+{
+	int			error;
+
+	/*
+	 * If the tempfile's attributes are in shortform format, convert that
+	 * to a single leaf extent so that we can use the atomic extent swap.
+	 */
+	if (temp_local) {
+		struct xfs_da_args	args = {
+			.dp		= sc->tempip,
+			.geo		= sc->mp->m_dir_geo,
+			.whichfork	= XFS_DATA_FORK,
+			.trans		= sc->tp,
+			.total		= 1,
+		};
+
+		error = xfs_dir2_sf_to_block(&args);
+		if (error)
+			return error;
+
+		error = xfs_defer_finish(&sc->tp);
+		if (error)
+			return error;
+	}
+
+	/*
+	 * If the file being repaired had a shortform attribute fork, convert
+	 * that to an empty extent list in preparation for the atomic extent
+	 * swap.
+	 */
+	if (ip_local) {
+		struct xfs_ifork	*ifp;
+
+		sc->ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
+		sc->ip->i_d.di_nextents = 0;
+
+		ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK);
+		xfs_ifork_reset(ifp);
+		ifp->if_bytes = 0;
+		ifp->if_u1.if_root = NULL;
+		ifp->if_height = 0;
+		ifp->if_flags |= XFS_IFEXTENTS;
+
+		xfs_trans_log_inode(sc->tp, sc->ip,
+				XFS_ILOG_CORE | XFS_ILOG_DDATA);
+	}
+
+	return 0;
+}
+
+/*
+ * Set the owner for this directory block to the directory being repaired.
+ * Return the magic number that we found, or the usual negative error.
+ */
+STATIC int
+xrep_dir_reset_owner(
+	struct xfs_scrub		*sc,
+	xfs_dablk_t			dabno,
+	struct xfs_buf			*bp,
+	unsigned int			*magic)
+{
+	struct xfs_da_geometry		*geo = sc->mp->m_dir_geo;
+	struct xfs_dir3_data_hdr	*data3 = bp->b_addr;
+	struct xfs_da3_blkinfo		*info3 = bp->b_addr;
+	struct xfs_dir3_free_hdr	*free3 = bp->b_addr;
+	struct xfs_dir2_data_entry	*dep;
+
+	/* Directory data blocks. */
+	if (dabno < geo->leafblk) {
+		*magic = be32_to_cpu(data3->hdr.magic);
+		if (*magic != XFS_DIR3_BLOCK_MAGIC &&
+		    *magic != XFS_DIR3_DATA_MAGIC)
+			return -EFSCORRUPTED;
+
+		/*
+		 * If this is a block format directory, it's possible that the
+		 * block was created as part of converting the temp directory
+		 * from short format to block format in order to use the atomic
+		 * extent swap.  In that case, the '.' entry will be set to
+		 * the temp dir, so find the dot entry and reset it.
+		 */
+		if (*magic == XFS_DIR3_BLOCK_MAGIC) {
+			dep = bp->b_addr + geo->data_entry_offset;
+			if (dep->namelen != 1 || dep->name[0] != '.')
+				return -EFSCORRUPTED;
+
+			dep->inumber = cpu_to_be64(sc->ip->i_ino);
+		}
+
+		data3->hdr.owner = be64_to_cpu(sc->ip->i_ino);
+		return 0;
+	}
+
+	/* Directory leaf and da node blocks. */
+	if (dabno < geo->freeblk) {
+		*magic = be16_to_cpu(info3->hdr.magic);
+		switch (*magic) {
+		case XFS_DA3_NODE_MAGIC:
+		case XFS_DIR3_LEAF1_MAGIC:
+		case XFS_DIR3_LEAFN_MAGIC:
+			break;
+		default:
+			return -EFSCORRUPTED;
+		}
+
+		info3->owner = be64_to_cpu(sc->ip->i_ino);
+		return 0;
+	}
+
+	/* Directory free blocks. */
+	*magic = be32_to_cpu(free3->hdr.magic);
+	if (*magic != XFS_DIR3_FREE_MAGIC)
+		return -EFSCORRUPTED;
+
+	free3->hdr.owner = be64_to_cpu(sc->ip->i_ino);
+	return 0;
+}
+
+/*
+ * If the buffer didn't have buffer ops set, we need to set them now that we've
+ * dirtied the directory block.
+ */
+STATIC void
+xrep_dir_set_verifier(
+	unsigned int		magic,
+	struct xfs_buf		*bp)
+{
+	switch (magic) {
+	case XFS_DIR3_BLOCK_MAGIC:
+		bp->b_ops = &xfs_dir3_block_buf_ops;
+		break;
+	case XFS_DIR3_DATA_MAGIC:
+		bp->b_ops = &xfs_dir3_data_buf_ops;
+		break;
+	case XFS_DA3_NODE_MAGIC:
+		bp->b_ops = &xfs_da3_node_buf_ops;
+		break;
+	case XFS_DIR3_LEAF1_MAGIC:
+		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
+		break;
+	case XFS_DIR3_LEAFN_MAGIC:
+		bp->b_ops = &xfs_dir3_leafn_buf_ops;
+		break;
+	case XFS_DIR3_FREE_MAGIC:
+		bp->b_ops = &xfs_dir3_free_buf_ops;
+		break;
+	}
+
+	xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
+}
+
+/*
+ * Change the owner field of every block in the data fork to match the
+ * directory being repaired.
+ */
+STATIC int
+xrep_dir_swap_owner(
+	struct xfs_scrub		*sc)
+{
+	struct xfs_bmbt_irec		map;
+	struct xfs_da_geometry		*geo = sc->mp->m_dir_geo;
+	struct xfs_buf			*bp;
+	xfs_fileoff_t			offset = 0;
+	xfs_fileoff_t			end = XFS_MAX_FILEOFF;
+	xfs_dablk_t			dabno;
+	int				nmap;
+	int				error;
+
+	for (offset = 0;
+	     offset < end;
+	     offset = map.br_startoff + map.br_blockcount) {
+		nmap = 1;
+		error = xfs_bmapi_read(sc->tempip, offset, end - offset,
+				&map, &nmap, 0);
+		if (error)
+			return error;
+		if (nmap != 1)
+			return -EFSCORRUPTED;
+		if (!xfs_bmap_is_real_extent(&map))
+			continue;
+
+
+		for (dabno = round_up(map.br_startoff, geo->fsbcount);
+		     dabno < map.br_startoff + map.br_blockcount;
+		     dabno += geo->fsbcount) {
+			unsigned int	magic;
+
+			error = xfs_da_read_buf(sc->tp, sc->tempip,
+					dabno, 0, &bp, XFS_DATA_FORK, NULL);
+			if (error)
+				return error;
+			if (!bp)
+				return -EFSCORRUPTED;
+
+			error = xrep_dir_reset_owner(sc, dabno, bp, &magic);
+			if (error) {
+				xfs_trans_brelse(sc->tp, bp);
+				return error;
+			}
+
+			if (bp->b_ops == NULL)
+				xrep_dir_set_verifier(magic, bp);
+
+			xfs_trans_ordered_buf(sc->tp, bp);
+			xfs_trans_brelse(sc->tp, bp);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * If both files' directory structure are in short format, we can copy
+ * the short format data from the tempfile to the repaired file if it'll
+ * fit.
+ */
+STATIC void
+xrep_dir_swap_local(
+	struct xfs_scrub	*sc,
+	int			newsize)
+{
+	struct xfs_ifork	*ifp1, *ifp2;
+
+	ifp1 = XFS_IFORK_PTR(sc->tempip, XFS_DATA_FORK);
+	ifp2 = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK);
+
+	xfs_idata_realloc(sc->ip, ifp2->if_bytes - ifp1->if_bytes,
+			XFS_DATA_FORK);
+
+	memcpy(ifp2->if_u1.if_data, ifp1->if_u1.if_data, newsize);
+	xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+}
+
+struct xfs_name xfs_name_dot = { (unsigned char *)".", 1, XFS_DIR3_FT_DIR };
+
+/* Swap the temporary directory's data fork with the one being repaired. */
+STATIC int
+xrep_dir_swap(
+	struct xrep_dir		*rd)
+{
+	struct xfs_scrub	*sc = rd->sc;
+	unsigned int		resblks;
+	bool			ip_local, temp_local;
+	int			error;
+
+	resblks = xfs_swap_range_calc_resblks(sc->tempip, sc->ip,
+			XFS_DATA_FORK, XFS_MAX_FILEOFF);
+	error = xchk_trans_alloc(sc, max(1U, resblks));
+	if (error)
+		return error;
+
+	/*
+	 * Lock and join the inodes to the tansaction so that transaction commit
+	 * or cancel will unlock the inodes from this point onwards.
+	 */
+	xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip,
+			XFS_ILOCK_EXCL);
+	sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
+	sc->ilock_flags |= XFS_ILOCK_EXCL;
+	xfs_trans_ijoin(sc->tp, sc->ip, 0);
+	xfs_trans_ijoin(sc->tp, sc->tempip, 0);
+
+	/*
+	 * Reset the temporary directory's '.' entry to point to the directory
+	 * we're repairing.  Note: shortform directories lack the dot entry.
+	 *
+	 * It's possible that this replacement could also expand a sf tempdir
+	 * into block format.
+	 */
+	if (XFS_IFORK_FORMAT(sc->tempip, XFS_DATA_FORK) !=
+			XFS_DINODE_FMT_LOCAL) {
+		error = xfs_dir_replace(sc->tp, sc->tempip, &xfs_name_dot,
+				sc->ip->i_ino, resblks);
+		if (error)
+			return error;
+	}
+
+	/*
+	 * Reset the temporary directory's '..' entry to point to the parent
+	 * that we found.  The temporary directory was created with the root
+	 * directory as the parent, so we can skip this if repairing a
+	 * subdirectory of the root.
+	 *
+	 * It's also possible that this replacement could also expand a sf
+	 * tempdir into block format.
+	 */
+	if (rd->parent_ino != sc->mp->m_rootip->i_ino) {
+		error = xfs_dir_replace(sc->tp, rd->sc->tempip,
+				&xfs_name_dotdot, rd->parent_ino, resblks);
+		if (error)
+			return error;
+	}
+
+	/* XXX: do we need to roll the transaction here? */
+
+	/*
+	 * Changing the dot and dotdot entries could have changed the shape of
+	 * the directory, so we recompute these.
+	 */
+	ip_local = XFS_IFORK_FORMAT(sc->ip, XFS_DATA_FORK) ==
+				XFS_DINODE_FMT_LOCAL;
+	temp_local = XFS_IFORK_FORMAT(sc->tempip, XFS_DATA_FORK) ==
+				XFS_DINODE_FMT_LOCAL;
+
+	/*
+	 * If the both files have a local format data fork and the rebuilt
+	 * directory data would fit in the repaired file's data fork, copy
+	 * the contents from the tempfile and declare ourselves done.
+	 */
+	if (ip_local && temp_local) {
+		if (sc->tempip->i_d.di_size <= XFS_IFORK_DSIZE(sc->ip)) {
+			xrep_dir_swap_local(sc, sc->tempip->i_d.di_size);
+			set_nlink(VFS_I(sc->ip), rd->new_nlink);
+			return 0;
+		}
+	}
+
+	/* Otherwise, make sure both data forks are in block-mapping mode. */
+	error = xrep_dir_swap_prep(sc, temp_local, ip_local);
+	if (error)
+		return error;
+
+	/* Rewrite the owner field of all attr blocks in the temporary file. */
+	error = xrep_dir_swap_owner(sc);
+	if (error)
+		return error;
+
+	/*
+	 * Set nlink of the directory under repair to the number of
+	 * subdirectories that will be in the new directory data.  Do this in
+	 * the same transaction sequence that (atomically) commits the new
+	 * data.
+	 */
+	set_nlink(VFS_I(sc->ip), rd->new_nlink);
+
+	return xfs_swapext_atomic(&sc->tp, sc->tempip, sc->ip, XFS_DATA_FORK,
+			0, 0, NULLFILEOFF,
+			XFS_SWAPEXT_SET_SIZES | XFS_SWAPEXT_TO_SHORTFORM2);
+}
+
 /*
  * Insert all the attributes that we collected.
  *
@@ -669,6 +1024,10 @@  xrep_dir_rebuild_tree(
 	if (error)
 		return error;
 
+	/*
+	 * Drop the ILOCK so that we don't pin the tail of the log.  We still
+	 * hold the IOLOCK (aka i_rwsem) which will prevent directory access.
+	 */
 	xfs_iunlock(rd->sc->ip, XFS_ILOCK_EXCL);
 	rd->sc->ilock_flags &= ~XFS_ILOCK_EXCL;
 
@@ -680,8 +1039,30 @@  xrep_dir_rebuild_tree(
 	if (error)
 		return error;
 
-	/* Re-add every entry to the directory. */
-	return xfbma_iter_del(rd->dir_entries, xrep_dir_insert_rec, rd);
+	/* Re-add every entry to the temporary directory. */
+	error = xfbma_iter_del(rd->dir_entries, xrep_dir_insert_rec, rd);
+	if (error)
+		return error;
+
+	/* Swap the tempdir's data fork with the file being repaired. */
+	error = xrep_dir_swap(rd);
+	if (error)
+		return error;
+
+	/*
+	 * Now reset the data fork of the temp directory to an empty shortform
+	 * directory because inactivation does nothing for directories.  We're
+	 * done with the inode that we want to repair, so roll the transaction
+	 * and drop its ILOCK before we tackle the temporary file.
+	 */
+	error = xfs_trans_roll_inode(&rd->sc->tp, rd->sc->tempip);
+	if (error)
+		return error;
+	xfs_iunlock(rd->sc->ip, XFS_ILOCK_EXCL);
+	rd->sc->ilock_flags &= ~XFS_ILOCK_EXCL;
+
+	return xrep_dir_reset_fork(rd->sc, rd->sc->tempip,
+			rd->sc->mp->m_rootip->i_ino);
 }
 
 /*
@@ -821,6 +1202,7 @@  xrep_dir(
 	struct xrep_dir		rd = {
 		.sc		= sc,
 		.parent_ino	= NULLFSINO,
+		.new_nlink	= 2,
 	};
 	int			error;
 
@@ -860,17 +1242,6 @@  xrep_dir(
 	if (error)
 		goto out;
 
-	/*
-	 * Invalidate and truncate all data fork extents.  This is the point at
-	 * which we are no longer able to bail out gracefully.  We commit the
-	 * transaction here because the rebuilding step allocates its own
-	 * transactions.
-	 */
-	xfs_trans_ijoin(sc->tp, sc->ip, 0);
-	error = xrep_dir_reset_fork(sc, sc->ip, rd.parent_ino);
-	if (error)
-		goto out;
-
 	/* Now rebuild the directory information. */
 	error = xrep_dir_rebuild_tree(&rd);
 out:
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 080c8838fba5..301fb9afbfde 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -17,6 +17,7 @@ 
 #include "xfs_inode.h"
 #include "xfs_dir2.h"
 #include "xfs_attr.h"
+#include "xfs_bit.h"
 #include "xfs_trans_space.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
@@ -1292,6 +1293,49 @@  xfs_release(
 	return 0;
 }
 
+/*
+ * Mark all the buffers attached to this directory stale.  In theory we should
+ * never be freeing a directory with any blocks at all, but this covers the
+ * case where we've recovered a directory swap with a "temporary" directory
+ * created by online repair and now need to dump it.
+ */
+STATIC void
+xfs_inactive_dir(
+	struct xfs_inode	*dp)
+{
+	struct xfs_iext_cursor	icur;
+	struct xfs_bmbt_irec	got;
+	struct xfs_mount	*mp = dp->i_mount;
+	struct xfs_da_geometry	*geo = mp->m_dir_geo;
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK);
+	struct xfs_buf		*bp;
+	xfs_fileoff_t		off;
+
+	/*
+	 * Invalidate each directory block.  All directory blocks are of
+	 * fsbcount length and alignment, so we only need to walk those same
+	 * offsets.  We hold the only reference to this inode, so we must wait
+	 * for the buffer locks.
+	 */
+	for_each_xfs_iext(ifp, &icur, &got) {
+		for (off = round_up(got.br_startoff, geo->fsbcount);
+		     off < got.br_startoff + got.br_blockcount;
+		     off += geo->fsbcount) {
+			xfs_fsblock_t	fsbno;
+
+			fsbno = (off - got.br_startoff) + got.br_startblock;
+			bp = xfs_buf_incore(mp->m_ddev_targp,
+					XFS_FSB_TO_DADDR(mp, fsbno),
+					XFS_FSB_TO_BB(mp, geo->fsbcount),
+					XBF_SCAN_STALE);
+			if (bp) {
+				xfs_buf_stale(bp);
+				xfs_buf_relse(bp);
+			}
+		}
+	}
+}
+
 /*
  * xfs_inactive_truncate
  *
@@ -1694,6 +1738,11 @@  xfs_inactive(
 	     ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0))
 		truncate = 1;
 
+	if (S_ISDIR(VFS_I(ip)->i_mode) && ip->i_d.di_nextents > 0) {
+		xfs_inactive_dir(ip);
+		truncate = 1;
+	}
+
 	if (S_ISLNK(VFS_I(ip)->i_mode))
 		error = xfs_inactive_symlink(ip);
 	else if (truncate)