diff mbox series

[16/21] xfs: improve detection of lost xfile contents

Message ID 20240126132903.2700077-17-hch@lst.de (mailing list archive)
State New
Headers show
Series [01/21] mm: move mapping_set_update out of <linux/swap.h> | expand

Commit Message

Christoph Hellwig Jan. 26, 2024, 1:28 p.m. UTC
From: "Darrick J. Wong" <djwong@kernel.org>

shmem files are weird animals with respect to figuring out if we've lost
any data.  The memory failure code can set HWPoison on a page, but it
also sets writeback errors on the file mapping.  Replace the twisty
multi-line if logic with a single helper that looks in all the places
that I know of where memory errors can show up.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/scrub/xfile.c | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

Comments

Matthew Wilcox Jan. 26, 2024, 4:33 p.m. UTC | #1
On Fri, Jan 26, 2024 at 02:28:58PM +0100, Christoph Hellwig wrote:
> +/* Has this file lost any of the data stored in it? */
> +static inline bool
> +xfile_has_lost_data(
> +	struct inode		*inode,
> +	struct folio		*folio)
> +{
> +	struct address_space	*mapping = inode->i_mapping;
> +
> +	/* This folio itself has been poisoned. */
> +	if (folio_test_hwpoison(folio))
> +		return true;
> +
> +	/* A base page under this large folio has been poisoned. */
> +	if (folio_test_large(folio) && folio_test_has_hwpoisoned(folio))
> +		return true;
> +
> +	/* Data loss has occurred anywhere in this shmem file. */
> +	if (test_bit(AS_EIO, &mapping->flags))
> +		return true;
> +	if (filemap_check_wb_err(mapping, 0))
> +		return true;
> +
> +	return false;
> +}

This is too much.  filemap_check_wb_err() will do just fine for your
needs unless you really want to get fine-grained and perhaps try to
reconstruct the contents of the file.
Darrick J. Wong Jan. 26, 2024, 4:50 p.m. UTC | #2
On Fri, Jan 26, 2024 at 04:33:40PM +0000, Matthew Wilcox wrote:
> On Fri, Jan 26, 2024 at 02:28:58PM +0100, Christoph Hellwig wrote:
> > +/* Has this file lost any of the data stored in it? */
> > +static inline bool
> > +xfile_has_lost_data(
> > +	struct inode		*inode,
> > +	struct folio		*folio)
> > +{
> > +	struct address_space	*mapping = inode->i_mapping;
> > +
> > +	/* This folio itself has been poisoned. */
> > +	if (folio_test_hwpoison(folio))
> > +		return true;
> > +
> > +	/* A base page under this large folio has been poisoned. */
> > +	if (folio_test_large(folio) && folio_test_has_hwpoisoned(folio))
> > +		return true;
> > +
> > +	/* Data loss has occurred anywhere in this shmem file. */
> > +	if (test_bit(AS_EIO, &mapping->flags))
> > +		return true;
> > +	if (filemap_check_wb_err(mapping, 0))
> > +		return true;
> > +
> > +	return false;
> > +}
> 
> This is too much.  filemap_check_wb_err() will do just fine for your
> needs unless you really want to get fine-grained and perhaps try to
> reconstruct the contents of the file.

Hah no, we're not going to implement online fsck for xfiles. ;)

Online fsck might be a little special in that any data loss anywhere in
an xfile needs to result in the repair aborting without touching the
ondisk metadata, and the sooner we realise this the better.

--D
Christoph Hellwig Jan. 28, 2024, 4:55 p.m. UTC | #3
On Fri, Jan 26, 2024 at 04:33:40PM +0000, Matthew Wilcox wrote:
> > +static inline bool
> > +xfile_has_lost_data(
> > +	struct inode		*inode,
> > +	struct folio		*folio)
> > +{
> > +	struct address_space	*mapping = inode->i_mapping;
> > +
> > +	/* This folio itself has been poisoned. */
> > +	if (folio_test_hwpoison(folio))
> > +		return true;
> > +
> > +	/* A base page under this large folio has been poisoned. */
> > +	if (folio_test_large(folio) && folio_test_has_hwpoisoned(folio))
> > +		return true;
> > +
> > +	/* Data loss has occurred anywhere in this shmem file. */
> > +	if (test_bit(AS_EIO, &mapping->flags))
> > +		return true;
> > +	if (filemap_check_wb_err(mapping, 0))
> > +		return true;
> > +
> > +	return false;
> > +}
> 
> This is too much.  filemap_check_wb_err() will do just fine for your
> needs unless you really want to get fine-grained and perhaps try to
> reconstruct the contents of the file.

As in only call filemap_check_wb_err and do away with all the
hwpoisoned checks and the extra AS_EIO check?
Matthew Wilcox Jan. 28, 2024, 8:33 p.m. UTC | #4
On Sun, Jan 28, 2024 at 05:55:49PM +0100, Christoph Hellwig wrote:
> On Fri, Jan 26, 2024 at 04:33:40PM +0000, Matthew Wilcox wrote:
> > > +static inline bool
> > > +xfile_has_lost_data(
> > > +	struct inode		*inode,
> > > +	struct folio		*folio)
> > > +{
> > > +	struct address_space	*mapping = inode->i_mapping;
> > > +
> > > +	/* This folio itself has been poisoned. */
> > > +	if (folio_test_hwpoison(folio))
> > > +		return true;
> > > +
> > > +	/* A base page under this large folio has been poisoned. */
> > > +	if (folio_test_large(folio) && folio_test_has_hwpoisoned(folio))
> > > +		return true;
> > > +
> > > +	/* Data loss has occurred anywhere in this shmem file. */
> > > +	if (test_bit(AS_EIO, &mapping->flags))
> > > +		return true;
> > > +	if (filemap_check_wb_err(mapping, 0))
> > > +		return true;
> > > +
> > > +	return false;
> > > +}
> > 
> > This is too much.  filemap_check_wb_err() will do just fine for your
> > needs unless you really want to get fine-grained and perhaps try to
> > reconstruct the contents of the file.
> 
> As in only call filemap_check_wb_err and do away with all the
> hwpoisoned checks and the extra AS_EIO check?

Yes, that's what i meant.
diff mbox series

Patch

diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c
index 077f9ce6e81409..2d802c20a8ddfe 100644
--- a/fs/xfs/scrub/xfile.c
+++ b/fs/xfs/scrub/xfile.c
@@ -99,6 +99,31 @@  xfile_destroy(
 	kfree(xf);
 }
 
+/* Has this file lost any of the data stored in it? */
+static inline bool
+xfile_has_lost_data(
+	struct inode		*inode,
+	struct folio		*folio)
+{
+	struct address_space	*mapping = inode->i_mapping;
+
+	/* This folio itself has been poisoned. */
+	if (folio_test_hwpoison(folio))
+		return true;
+
+	/* A base page under this large folio has been poisoned. */
+	if (folio_test_large(folio) && folio_test_has_hwpoisoned(folio))
+		return true;
+
+	/* Data loss has occurred anywhere in this shmem file. */
+	if (test_bit(AS_EIO, &mapping->flags))
+		return true;
+	if (filemap_check_wb_err(mapping, 0))
+		return true;
+
+	return false;
+}
+
 /*
  * Load an object.  Since we're treating this file as "memory", any error or
  * short IO is treated as a failure to allocate memory.
@@ -138,9 +163,7 @@  xfile_load(
 				PAGE_SIZE - offset_in_page(pos));
 			memset(buf, 0, len);
 		} else {
-			if (folio_test_hwpoison(folio) ||
-			    (folio_test_large(folio) &&
-			     folio_test_has_hwpoisoned(folio))) {
+			if (xfile_has_lost_data(inode, folio)) {
 				folio_unlock(folio);
 				folio_put(folio);
 				break;
@@ -201,9 +224,7 @@  xfile_store(
 		if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio,
 				SGP_CACHE) < 0)
 			break;
-		if (folio_test_hwpoison(folio) ||
-		    (folio_test_large(folio) &&
-		     folio_test_has_hwpoisoned(folio))) {
+		if (xfile_has_lost_data(inode, folio)) {
 			folio_unlock(folio);
 			folio_put(folio);
 			break;