diff mbox series

[v4,09/10] xfs: Implement ->corrupted_range() for XFS

Message ID 20210604011844.1756145-10-ruansy.fnst@fujitsu.com (mailing list archive)
State Superseded
Headers show
Series fsdax: introduce fs query to support reflink | expand

Commit Message

Shiyang Ruan June 4, 2021, 1:18 a.m. UTC
This function is used to handle errors which may cause data lost in
filesystem.  Such as memory failure in fsdax mode.

If the rmap feature of XFS enabled, we can query it to find files and
metadata which are associated with the corrupt data.  For now all we do
is kill processes with that file mapped into their address spaces, but
future patches could actually do something about corrupt metadata.

After that, the memory failure needs to notify the processes who are
using those files.

Only support data device.  Realtime device is not supported for now.

Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
---
 fs/xfs/xfs_fsops.c |   5 +++
 fs/xfs/xfs_mount.h |   1 +
 fs/xfs/xfs_super.c | 108 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 114 insertions(+)

Comments

kernel test robot June 4, 2021, 5:22 a.m. UTC | #1
Hi Shiyang,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on xfs-linux/for-next]
[also build test ERROR on dm/for-next linus/master v5.13-rc4]
[cannot apply to hnaz-linux-mm/master next-20210603]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Shiyang-Ruan/fsdax-introduce-fs-query-to-support-reflink/20210604-092105
base:   https://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git for-next
config: csky-randconfig-r014-20210604 (attached as .config)
compiler: csky-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/8fc6cb02d396487fa3a77fb57f23dcdc978dd3e3
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Shiyang-Ruan/fsdax-introduce-fs-query-to-support-reflink/20210604-092105
        git checkout 8fc6cb02d396487fa3a77fb57f23dcdc978dd3e3
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=csky 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   fs/xfs/xfs_super.c: In function 'xfs_open_devices':
>> fs/xfs/xfs_super.c:400:2: error: implicit declaration of function 'dax_set_holder'; did you mean 'xas_set_order'? [-Werror=implicit-function-declaration]
     400 |  dax_set_holder(dax_ddev, mp->m_super, &fs_dax_holder_ops);
         |  ^~~~~~~~~~~~~~
         |  xas_set_order
   cc1: some warnings being treated as errors


vim +400 fs/xfs/xfs_super.c

   379	
   380	/*
   381	 * The file system configurations are:
   382	 *	(1) device (partition) with data and internal log
   383	 *	(2) logical volume with data and log subvolumes.
   384	 *	(3) logical volume with data, log, and realtime subvolumes.
   385	 *
   386	 * We only have to handle opening the log and realtime volumes here if
   387	 * they are present.  The data subvolume has already been opened by
   388	 * get_sb_bdev() and is stored in sb->s_bdev.
   389	 */
   390	STATIC int
   391	xfs_open_devices(
   392		struct xfs_mount	*mp)
   393	{
   394		struct block_device	*ddev = mp->m_super->s_bdev;
   395		struct dax_device	*dax_ddev = fs_dax_get_by_bdev(ddev);
   396		struct dax_device	*dax_logdev = NULL, *dax_rtdev = NULL;
   397		struct block_device	*logdev = NULL, *rtdev = NULL;
   398		int			error;
   399	
 > 400		dax_set_holder(dax_ddev, mp->m_super, &fs_dax_holder_ops);
   401		/*
   402		 * Open real time and log devices - order is important.
   403		 */
   404		if (mp->m_logname) {
   405			error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
   406			if (error)
   407				goto out;
   408			dax_logdev = fs_dax_get_by_bdev(logdev);
   409			if (dax_logdev != dax_ddev)
   410				dax_set_holder(dax_logdev, mp->m_super,
   411					       &fs_dax_holder_ops);
   412		}
   413	
   414		if (mp->m_rtname) {
   415			error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
   416			if (error)
   417				goto out_close_logdev;
   418	
   419			if (rtdev == ddev || rtdev == logdev) {
   420				xfs_warn(mp,
   421		"Cannot mount filesystem with identical rtdev and ddev/logdev.");
   422				error = -EINVAL;
   423				goto out_close_rtdev;
   424			}
   425			dax_rtdev = fs_dax_get_by_bdev(rtdev);
   426			dax_set_holder(dax_rtdev, mp->m_super, &fs_dax_holder_ops);
   427		}
   428	
   429		/*
   430		 * Setup xfs_mount buffer target pointers
   431		 */
   432		error = -ENOMEM;
   433		mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, dax_ddev);
   434		if (!mp->m_ddev_targp)
   435			goto out_close_rtdev;
   436	
   437		if (rtdev) {
   438			mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, dax_rtdev);
   439			if (!mp->m_rtdev_targp)
   440				goto out_free_ddev_targ;
   441		}
   442	
   443		if (logdev && logdev != ddev) {
   444			mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, dax_logdev);
   445			if (!mp->m_logdev_targp)
   446				goto out_free_rtdev_targ;
   447		} else {
   448			mp->m_logdev_targp = mp->m_ddev_targp;
   449		}
   450	
   451		return 0;
   452	
   453	 out_free_rtdev_targ:
   454		if (mp->m_rtdev_targp)
   455			xfs_free_buftarg(mp->m_rtdev_targp);
   456	 out_free_ddev_targ:
   457		xfs_free_buftarg(mp->m_ddev_targp);
   458	 out_close_rtdev:
   459		xfs_blkdev_put(rtdev);
   460		fs_put_dax(dax_rtdev);
   461	 out_close_logdev:
   462		if (logdev && logdev != ddev) {
   463			xfs_blkdev_put(logdev);
   464			fs_put_dax(dax_logdev);
   465		}
   466	 out:
   467		fs_put_dax(dax_ddev);
   468		return error;
   469	}
   470	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot June 4, 2021, 5:40 a.m. UTC | #2
Hi Shiyang,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on xfs-linux/for-next]
[also build test ERROR on dm/for-next linus/master v5.13-rc4]
[cannot apply to hnaz-linux-mm/master next-20210603]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Shiyang-Ruan/fsdax-introduce-fs-query-to-support-reflink/20210604-092105
base:   https://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git for-next
config: parisc-randconfig-r016-20210604 (attached as .config)
compiler: hppa-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/8fc6cb02d396487fa3a77fb57f23dcdc978dd3e3
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Shiyang-Ruan/fsdax-introduce-fs-query-to-support-reflink/20210604-092105
        git checkout 8fc6cb02d396487fa3a77fb57f23dcdc978dd3e3
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=parisc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   hppa-linux-ld: fs/xfs/xfs_super.o: in function `xfs_open_devices':
>> (.text+0x1e94): undefined reference to `fs_dax_holder_ops'
>> hppa-linux-ld: (.text+0x1e98): undefined reference to `fs_dax_holder_ops'
   hppa-linux-ld: (.text+0x1ff0): undefined reference to `fs_dax_holder_ops'
   hppa-linux-ld: fs/xfs/xfs_super.o: in function `.LC45':
>> (.rodata.cst4+0x70): undefined reference to `mf_dax_kill_procs'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index be9cf88d2ad7..e89ada33d8fc 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -551,6 +551,11 @@  xfs_do_force_shutdown(
 "Corruption of in-memory data detected.  Shutting down filesystem");
 		if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
 			xfs_stack_trace();
+	} else if (flags & SHUTDOWN_CORRUPT_META) {
+		xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT,
+"Corruption of on-disk metadata detected.  Shutting down filesystem");
+		if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
+			xfs_stack_trace();
 	} else if (logerror) {
 		xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
 			"Log I/O Error Detected. Shutting down filesystem");
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index bb67274ee23f..c62ccf3e07d0 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -276,6 +276,7 @@  void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
 #define SHUTDOWN_LOG_IO_ERROR	0x0002	/* write attempt to the log failed */
 #define SHUTDOWN_FORCE_UMOUNT	0x0004	/* shutdown from a forced unmount */
 #define SHUTDOWN_CORRUPT_INCORE	0x0008	/* corrupt in-memory data structures */
+#define SHUTDOWN_CORRUPT_META	0x0010  /* corrupt metadata on device */
 
 /*
  * Flags for xfs_mountfs
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index a2dab05332ac..498edaeb8363 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -36,6 +36,11 @@ 
 #include "xfs_bmap_item.h"
 #include "xfs_reflink.h"
 #include "xfs_pwork.h"
+#include "xfs_alloc.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_rtalloc.h"
+#include "xfs_bit.h"
 
 #include <linux/magic.h>
 #include <linux/fs_context.h>
@@ -392,6 +397,7 @@  xfs_open_devices(
 	struct block_device	*logdev = NULL, *rtdev = NULL;
 	int			error;
 
+	dax_set_holder(dax_ddev, mp->m_super, &fs_dax_holder_ops);
 	/*
 	 * Open real time and log devices - order is important.
 	 */
@@ -400,6 +406,9 @@  xfs_open_devices(
 		if (error)
 			goto out;
 		dax_logdev = fs_dax_get_by_bdev(logdev);
+		if (dax_logdev != dax_ddev)
+			dax_set_holder(dax_logdev, mp->m_super,
+				       &fs_dax_holder_ops);
 	}
 
 	if (mp->m_rtname) {
@@ -414,6 +423,7 @@  xfs_open_devices(
 			goto out_close_rtdev;
 		}
 		dax_rtdev = fs_dax_get_by_bdev(rtdev);
+		dax_set_holder(dax_rtdev, mp->m_super, &fs_dax_holder_ops);
 	}
 
 	/*
@@ -1076,6 +1086,103 @@  xfs_fs_free_cached_objects(
 	return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
 }
 
+static int
+xfs_corrupt_helper(
+	struct xfs_btree_cur		*cur,
+	struct xfs_rmap_irec		*rec,
+	void				*data)
+{
+	struct xfs_inode		*ip;
+	struct address_space		*mapping;
+	int				rc = 0;
+	int				*flags = data;
+
+	if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) ||
+	    (rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))) {
+		// TODO check and try to fix metadata
+		xfs_force_shutdown(cur->bc_mp, SHUTDOWN_CORRUPT_META);
+		return -EFSCORRUPTED;
+	}
+
+	/*
+	 * Get files that incore, filter out others that are not in use.
+	 */
+	rc = xfs_iget(cur->bc_mp, cur->bc_tp, rec->rm_owner, XFS_IGET_INCORE,
+			0, &ip);
+	if (rc)
+		return rc;
+
+	mapping = VFS_I(ip)->i_mapping;
+	rc = mf_dax_kill_procs(mapping, rec->rm_offset, *flags);
+
+	// TODO try to fix data
+	xfs_irele(ip);
+
+	return rc;
+}
+
+static int
+xfs_fs_corrupted_range(
+	struct super_block	*sb,
+	struct block_device	*bdev,
+	loff_t			offset,
+	size_t			len,
+	void			*data)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+	struct xfs_trans	*tp = NULL;
+	struct xfs_btree_cur	*cur = NULL;
+	struct xfs_rmap_irec	rmap_low, rmap_high;
+	struct xfs_buf		*agf_bp = NULL;
+	xfs_fsblock_t		fsbno = XFS_B_TO_FSB(mp, offset);
+	xfs_filblks_t		bcnt = XFS_B_TO_FSB(mp, len);
+	xfs_agnumber_t		agno = XFS_FSB_TO_AGNO(mp, fsbno);
+	xfs_agblock_t		agbno = XFS_FSB_TO_AGBNO(mp, fsbno);
+	int			error = 0;
+
+	if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_bdev == bdev) {
+		xfs_warn(mp, "corrupted_range support not available for realtime device!");
+		return -EOPNOTSUPP;
+	}
+	if (mp->m_logdev_targp && mp->m_logdev_targp->bt_bdev == bdev &&
+	    mp->m_logdev_targp != mp->m_ddev_targp) {
+		xfs_err(mp, "ondisk log corrupt, shutting down fs!");
+		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_META);
+		return -EFSCORRUPTED;
+	}
+
+	if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+		xfs_warn(mp, "corrupted_range needs rmapbt enabled!");
+		return -EOPNOTSUPP;
+	}
+
+	error = xfs_trans_alloc_empty(mp, &tp);
+	if (error)
+		return error;
+
+	error = xfs_alloc_read_agf(mp, tp, agno, 0, &agf_bp);
+	if (error)
+		goto out_cancel_tp;
+
+	cur = xfs_rmapbt_init_cursor(mp, tp, agf_bp, agno);
+
+	/* Construct a range for rmap query */
+	memset(&rmap_low, 0, sizeof(rmap_low));
+	memset(&rmap_high, 0xFF, sizeof(rmap_high));
+	rmap_low.rm_startblock = rmap_high.rm_startblock = agbno;
+	rmap_low.rm_blockcount = rmap_high.rm_blockcount = bcnt;
+
+	error = xfs_rmap_query_range(cur, &rmap_low, &rmap_high,
+				     xfs_corrupt_helper, data);
+
+	xfs_btree_del_cursor(cur, error);
+	xfs_trans_brelse(tp, agf_bp);
+
+out_cancel_tp:
+	xfs_trans_cancel(tp);
+	return error;
+}
+
 static const struct super_operations xfs_super_operations = {
 	.alloc_inode		= xfs_fs_alloc_inode,
 	.destroy_inode		= xfs_fs_destroy_inode,
@@ -1089,6 +1196,7 @@  static const struct super_operations xfs_super_operations = {
 	.show_options		= xfs_fs_show_options,
 	.nr_cached_objects	= xfs_fs_nr_cached_objects,
 	.free_cached_objects	= xfs_fs_free_cached_objects,
+	.corrupted_range	= xfs_fs_corrupted_range,
 };
 
 static int