diff mbox series

[v4] btrfs: add fs state details to error messages.

Message ID a059920460fe13f773fd9a2e870ceb9a8e3a105a.1645644489.git.sweettea-kernel@dorminy.me (mailing list archive)
State New, archived
Headers show
Series [v4] btrfs: add fs state details to error messages. | expand

Commit Message

Sweet Tea Dorminy Feb. 23, 2022, 7:38 p.m. UTC
When a filesystem goes read-only due to an error, multiple errors tend
to be reported, some of which are knock-on failures. Logging fs_states,
in btrfs_handle_fs_error() and btrfs_printk() helps distinguish the
first error from subsequent messages which may only exist due to an
error state.

Under the new format, most initial errors will look like:
`BTRFS: error (device loop0) in ...`
while subsequent errors will begin with:
`error (device loop0: state E) in ...`

An initial transaction abort error will look like
`error (device loop0: state A) in ...`
and subsequent messages will contain
`(device loop0: state EA) in ...`

Signed-off-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
---
v4:
  - Adjusted state translation table to contain chars instead of
    strings.

v3:
  - Reworked btrfs_state_to_string to use an array mapping all states
    to various error chars, or nothing, explicitly. Added error logging
    for more states, as requested.
  - Consolidated buffer length definition
  - ttps://lore.kernel.org/linux-btrfs/8a2a73ab4b48a4e73d24cf7f10cc0fe245d50a84.1645562216.git.sweettea-kernel@dorminy.me/

v2: 
  - Changed btrfs_state_to_string() for clarity
  - Removed superfluous whitespace change
  - https://lore.kernel.org/linux-btrfs/084c136c6bb2d20ca0e91af7ded48306d52bb910.1645210326.git.sweettea-kernel@dorminy.me/

v1:
  - https://lore.kernel.org/linux-btrfs/20220212191042.94954-1-sweettea-kernel@dorminy.me/

 fs/btrfs/ctree.h |  2 ++
 fs/btrfs/super.c | 62 +++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 56 insertions(+), 8 deletions(-)

Comments

David Sterba Feb. 24, 2022, 1:22 p.m. UTC | #1
On Wed, Feb 23, 2022 at 02:38:06PM -0500, Sweet Tea Dorminy wrote:
> When a filesystem goes read-only due to an error, multiple errors tend
> to be reported, some of which are knock-on failures. Logging fs_states,
> in btrfs_handle_fs_error() and btrfs_printk() helps distinguish the
> first error from subsequent messages which may only exist due to an
> error state.
> 
> Under the new format, most initial errors will look like:
> `BTRFS: error (device loop0) in ...`
> while subsequent errors will begin with:
> `error (device loop0: state E) in ...`
> 
> An initial transaction abort error will look like
> `error (device loop0: state A) in ...`
> and subsequent messages will contain
> `(device loop0: state EA) in ...`
> 
> Signed-off-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>

Added to misc-next with some minor updates, thanks.

> ---
> v4:
>   - Adjusted state translation table to contain chars instead of
>     strings.
> 
> v3:
>   - Reworked btrfs_state_to_string to use an array mapping all states
>     to various error chars, or nothing, explicitly. Added error logging
>     for more states, as requested.
>   - Consolidated buffer length definition
>   - ttps://lore.kernel.org/linux-btrfs/8a2a73ab4b48a4e73d24cf7f10cc0fe245d50a84.1645562216.git.sweettea-kernel@dorminy.me/
> 
> v2: 
>   - Changed btrfs_state_to_string() for clarity
>   - Removed superfluous whitespace change
>   - https://lore.kernel.org/linux-btrfs/084c136c6bb2d20ca0e91af7ded48306d52bb910.1645210326.git.sweettea-kernel@dorminy.me/
> 
> v1:
>   - https://lore.kernel.org/linux-btrfs/20220212191042.94954-1-sweettea-kernel@dorminy.me/
> 
>  fs/btrfs/ctree.h |  2 ++
>  fs/btrfs/super.c | 62 +++++++++++++++++++++++++++++++++++++++++-------
>  2 files changed, 56 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 8992e0096163..3db337cd015a 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -148,6 +148,8 @@ enum {
>  
>  	/* Indicates there was an error cleaning up a log tree. */
>  	BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
> +
> +	BTRFS_FS_STATE_COUNT,
>  };
>  
>  #define BTRFS_BACKREF_REV_MAX		256
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index 4d947ba32da9..7ef6a3e494d0 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -66,6 +66,46 @@ static struct file_system_type btrfs_root_fs_type;
>  
>  static int btrfs_remount(struct super_block *sb, int *flags, char *data);
>  
> +#define STATE_STRING_PREFACE ": state "
> +#define STATE_STRING_BUF_LEN \
> +	(sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT)
> +
> +/* Characters to print to indicate error conditions. RO is not an error. */
> +static const char fs_state_chars[] = {
> +	[BTRFS_FS_STATE_ERROR]			= 'E',
> +	[BTRFS_FS_STATE_REMOUNTING]		= 'M',
> +	[BTRFS_FS_STATE_RO]			= 0,
> +	[BTRFS_FS_STATE_TRANS_ABORTED]		= 'A',
> +	[BTRFS_FS_STATE_DEV_REPLACING]		= 'P',
> +	[BTRFS_FS_STATE_DUMMY_FS_INFO]		= 0,
> +	[BTRFS_FS_STATE_NO_CSUMS]		= 0,

I've added 'C' for this, as it's an interesting state (data checksums
not verified) and renamed the device replace to 'R'.

> +	[BTRFS_FS_STATE_LOG_CLEANUP_ERROR]	= 'L',
> +};
> +
> +static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
> +{
> +	unsigned int bit;
> +	unsigned int states_printed = 0;

This could be a simple bool indicator.

> +	char *curr = buf;
> +
> +	memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
> +	curr += sizeof(STATE_STRING_PREFACE) - 1;
> +
> +	for_each_set_bit(bit, &info->fs_state, sizeof(info->fs_state)) {
> +		WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT);
> +		if ((bit < BTRFS_FS_STATE_COUNT) && fs_state_chars[bit]) {
> +			*curr++ = fs_state_chars[bit];
> +			states_printed++;
> +		}
> +	}
> +
> +	/* If no states were printed, reset the buffer */
> +	if (!states_printed)
> +		curr = buf;
> +
> +	*curr++ = '\0';
> +}
Sweet Tea Dorminy Feb. 24, 2022, 4:10 p.m. UTC | #2
> Added to misc-next with some minor updates, thanks.
>
Awesome, thank you. I realized this morning that it might be technically 
slightly racy actually and would propose something like the following

static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
{
	unsigned int bit;
+	unsigned long fs_state = READ_ONCE(info->fs_state);
	unsigned int states_printed = 0;
	char *curr = buf;

	memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
	curr += sizeof(STATE_STRING_PREFACE) - 1;

-	for_each_set_bit(bit, &info->fs_state, sizeof(info->fs_state)) {
+	for_each_set_bit(bit, fs_state, sizeof(fs_state)) {


All the other interactions with info->fs_state are test/set/clear_bit, which treat the argument as volatile and are therefore safe to do from multiple threads. Without the READ_ONCE (reading it as a volatile), the compiler or cpu could turn the reads of info->fs_state in for_each_set_bit() into writes of random stuff into info->fs_state, potentially clearing the state bits or filling them with garbage.

Even if this is right, it'd be rare, but it would be exceeding weird for a message to be logged listing an error and then future messages be logged without any such state, or with a random collection of garbage states.

I can send another patch if this explanation seems reasonable and you'd like it separate; or maybe this is too unlikely to worry about. Thanks again for the review!

Sweet Tea
David Sterba Feb. 24, 2022, 6:42 p.m. UTC | #3
On Thu, Feb 24, 2022 at 11:10:59AM -0500, Sweet Tea Dorminy wrote:
> Awesome, thank you. I realized this morning that it might be technically 
> slightly racy actually and would propose something like the following
> 
> static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
> {
> 	unsigned int bit;
> +	unsigned long fs_state = READ_ONCE(info->fs_state);
> 	unsigned int states_printed = 0;
> 	char *curr = buf;
> 
> 	memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
> 	curr += sizeof(STATE_STRING_PREFACE) - 1;
> 
> -	for_each_set_bit(bit, &info->fs_state, sizeof(info->fs_state)) {
> +	for_each_set_bit(bit, fs_state, sizeof(fs_state)) {
> 
> 
> All the other interactions with info->fs_state are test/set/clear_bit,
> which treat the argument as volatile and are therefore safe to do from
> multiple threads. Without the READ_ONCE (reading it as a volatile),
> the compiler or cpu could turn the reads of info->fs_state in
> for_each_set_bit() into writes of random stuff into info->fs_state,
> potentially clearing the state bits or filling them with garbage.

I'm not sure I'm missing something, but I find the above hard to
believe. Concurrent access to a variable from multiple threads may not
produce consistent results, but random writes should not happen when
we're just reading. The worst thing that could happen is a missing
status bit reported, which is not a problem.

> Even if this is right, it'd be rare, but it would be exceeding weird
> for a message to be logged listing an error and then future messages
> be logged without any such state, or with a random collection of
> garbage states.

How would that happen? The volatile keyword is only a compiler hint not
to do optimizations on the variable, what actually happens on the CPU
level depends if the instruction is locked or not, so different threads
may read different bits.
You seem to imply that once a variable is not used with volatile
semantics, even just for read, the result could lead to random writes
because it's otherwise undefined.
Sweet Tea Dorminy Feb. 24, 2022, 8:09 p.m. UTC | #4
>> All the other interactions with info->fs_state are test/set/clear_bit,
>> which treat the argument as volatile and are therefore safe to do from
>> multiple threads. Without the READ_ONCE (reading it as a volatile),
>> the compiler or cpu could turn the reads of info->fs_state in
>> for_each_set_bit() into writes of random stuff into info->fs_state,
>> potentially clearing the state bits or filling them with garbage.
> I'm not sure I'm missing something, but I find the above hard to
> believe. Concurrent access to a variable from multiple threads may not
> produce consistent results, but random writes should not happen when
> we're just reading.

Maybe I've been reading too many articles about the things compilers are 
technically allowed to do. But as per the following link, the C standard 
does permit compilers inventing writes except to atomics and volatiles: 
https://lwn.net/Articles/793253/#Invented%20Stores

>
>> Even if this is right, it'd be rare, but it would be exceeding weird
>> for a message to be logged listing an error and then future messages
>> be logged without any such state, or with a random collection of
>> garbage states.
> How would that happen? The volatile keyword is only a compiler hint not
> to do optimizations on the variable, what actually happens on the CPU
> level depends if the instruction is locked or not, so different threads
> may read different bits.
> You seem to imply that once a variable is not used with volatile
> semantics, even just for read, the result could lead to random writes
> because it's otherwise undefined.

Pretty much; once a variable is read without READ_ONCE, it's unsafe to 
write a new value on another thread that depends on the old value. 
Imagine a compiler which invents stores; then if you are both reading 
and setting a variable 'a' on different threads, the following could happen:

thread 1 (reads)       thread 2 (modifies)

reads a into tmp

stores junk into a

                                 reads junk from a

stores tmp into a

                                 writes junk | 2 to a


Now a contains junk indefinitely.


But if it's too theoretical, I'm happy to drop it and amend my paranoia 
level.


(Thanks for fixing the !CONFIG_PRINTK warning that btrfs_state_to_string 
was unused; sorry I missed it.)


Sweet Tea
Omar Sandoval Feb. 24, 2022, 8:17 p.m. UTC | #5
On Thu, Feb 24, 2022 at 03:09:08PM -0500, Sweet Tea Dorminy wrote:
> > > All the other interactions with info->fs_state are test/set/clear_bit,
> > > which treat the argument as volatile and are therefore safe to do from
> > > multiple threads. Without the READ_ONCE (reading it as a volatile),
> > > the compiler or cpu could turn the reads of info->fs_state in
> > > for_each_set_bit() into writes of random stuff into info->fs_state,
> > > potentially clearing the state bits or filling them with garbage.
> > I'm not sure I'm missing something, but I find the above hard to
> > believe. Concurrent access to a variable from multiple threads may not
> > produce consistent results, but random writes should not happen when
> > we're just reading.
> 
> Maybe I've been reading too many articles about the things compilers are
> technically allowed to do. But as per the following link, the C standard
> does permit compilers inventing writes except to atomics and volatiles:
> https://lwn.net/Articles/793253/#Invented%20Stores
> 
> > 
> > > Even if this is right, it'd be rare, but it would be exceeding weird
> > > for a message to be logged listing an error and then future messages
> > > be logged without any such state, or with a random collection of
> > > garbage states.
> > How would that happen? The volatile keyword is only a compiler hint not
> > to do optimizations on the variable, what actually happens on the CPU
> > level depends if the instruction is locked or not, so different threads
> > may read different bits.
> > You seem to imply that once a variable is not used with volatile
> > semantics, even just for read, the result could lead to random writes
> > because it's otherwise undefined.
> 
> Pretty much; once a variable is read without READ_ONCE, it's unsafe to write
> a new value on another thread that depends on the old value. Imagine a
> compiler which invents stores; then if you are both reading and setting a
> variable 'a' on different threads, the following could happen:
> 
> thread 1 (reads)       thread 2 (modifies)
> 
> reads a into tmp
> 
> stores junk into a
> 
>                                 reads junk from a
> 
> stores tmp into a
> 
>                                 writes junk | 2 to a
> 
> 
> Now a contains junk indefinitely.
> 
> 
> But if it's too theoretical, I'm happy to drop it and amend my paranoia
> level.

I agree with Sweet Tea here. Even if it's very theoretical, it costs us
nothing to do the "correct" thing here.
David Sterba Feb. 28, 2022, 4:48 p.m. UTC | #6
On Thu, Feb 24, 2022 at 03:09:08PM -0500, Sweet Tea Dorminy wrote:
> >> All the other interactions with info->fs_state are test/set/clear_bit,
> >> which treat the argument as volatile and are therefore safe to do from
> >> multiple threads. Without the READ_ONCE (reading it as a volatile),
> >> the compiler or cpu could turn the reads of info->fs_state in
> >> for_each_set_bit() into writes of random stuff into info->fs_state,
> >> potentially clearing the state bits or filling them with garbage.
> > I'm not sure I'm missing something, but I find the above hard to
> > believe. Concurrent access to a variable from multiple threads may not
> > produce consistent results, but random writes should not happen when
> > we're just reading.
> 
> Maybe I've been reading too many articles about the things compilers are 
> technically allowed to do. But as per the following link, the C standard 
> does permit compilers inventing writes except to atomics and volatiles: 
> https://lwn.net/Articles/793253/#Invented%20Stores

There's a difference between technically allowed to do and actually
doing it. The article sections says that no such compiler is known with
some exception of an old one that got fixed. Also as I read the example
of the invented write, it's very specific and I don't se how it applies
here at all.

> >> Even if this is right, it'd be rare, but it would be exceeding weird
> >> for a message to be logged listing an error and then future messages
> >> be logged without any such state, or with a random collection of
> >> garbage states.
> > How would that happen? The volatile keyword is only a compiler hint not
> > to do optimizations on the variable, what actually happens on the CPU
> > level depends if the instruction is locked or not, so different threads
> > may read different bits.
> > You seem to imply that once a variable is not used with volatile
> > semantics, even just for read, the result could lead to random writes
> > because it's otherwise undefined.
> 
> Pretty much; once a variable is read without READ_ONCE, it's unsafe to 
> write a new value on another thread that depends on the old value. 

You're giving 'volatile' too much credit for guaranteeing safety for
interprocess operation, it's just a compiler hint to avoid
optimizations, it's not a synchronization primitive. In some cases
lockless reads are safe, as long as there are no updates or potential
reloads between reads that could be changed by another thread. We use
READ_ONCE in many cases, compiler in most cases generates the same
instructions as without it and it's merely an annotation and a way of
syntactic comment to denote special handling.

> Imagine a compiler which invents stores; then if you are both reading 
> and setting a variable 'a' on different threads, the following could happen:
> 
> thread 1 (reads)       thread 2 (modifies)
> 
> reads a into tmp
> 
> stores junk into a
> 
>                                  reads junk from a
> 
> stores tmp into a
> 
>                                  writes junk | 2 to a
> 
> 
> Now a contains junk indefinitely.

But we're only reading here, that why I don't see any real risk. We read
a variable, potentially changed from other thread, and actually changed
by atomic bit operations. Where are the invented writes going to come
from, can't we rely on the bit handling primitives?

If you're example above would be real, we'd either lack proper locking
or there's a serious problem with compiler that would blow up in many
places (like inventing newly set bits in flags or random values
appearing in regular variables).

> But if it's too theoretical, I'm happy to drop it and amend my paranoia 
> level.

I think it's highly theoretical but the READ_ONCE should be there as a
matter of clean code practice, so I've updated the code.
diff mbox series

Patch

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8992e0096163..3db337cd015a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -148,6 +148,8 @@  enum {
 
 	/* Indicates there was an error cleaning up a log tree. */
 	BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
+
+	BTRFS_FS_STATE_COUNT,
 };
 
 #define BTRFS_BACKREF_REV_MAX		256
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4d947ba32da9..7ef6a3e494d0 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,6 +66,46 @@  static struct file_system_type btrfs_root_fs_type;
 
 static int btrfs_remount(struct super_block *sb, int *flags, char *data);
 
+#define STATE_STRING_PREFACE ": state "
+#define STATE_STRING_BUF_LEN \
+	(sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT)
+
+/* Characters to print to indicate error conditions. RO is not an error. */
+static const char fs_state_chars[] = {
+	[BTRFS_FS_STATE_ERROR]			= 'E',
+	[BTRFS_FS_STATE_REMOUNTING]		= 'M',
+	[BTRFS_FS_STATE_RO]			= 0,
+	[BTRFS_FS_STATE_TRANS_ABORTED]		= 'A',
+	[BTRFS_FS_STATE_DEV_REPLACING]		= 'P',
+	[BTRFS_FS_STATE_DUMMY_FS_INFO]		= 0,
+	[BTRFS_FS_STATE_NO_CSUMS]		= 0,
+	[BTRFS_FS_STATE_LOG_CLEANUP_ERROR]	= 'L',
+};
+
+static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
+{
+	unsigned int bit;
+	unsigned int states_printed = 0;
+	char *curr = buf;
+
+	memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
+	curr += sizeof(STATE_STRING_PREFACE) - 1;
+
+	for_each_set_bit(bit, &info->fs_state, sizeof(info->fs_state)) {
+		WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT);
+		if ((bit < BTRFS_FS_STATE_COUNT) && fs_state_chars[bit]) {
+			*curr++ = fs_state_chars[bit];
+			states_printed++;
+		}
+	}
+
+	/* If no states were printed, reset the buffer */
+	if (!states_printed)
+		curr = buf;
+
+	*curr++ = '\0';
+}
+
 /*
  * Generally the error codes correspond to their respective errors, but there
  * are a few special cases.
@@ -128,6 +168,7 @@  void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
 {
 	struct super_block *sb = fs_info->sb;
 #ifdef CONFIG_PRINTK
+	char statestr[STATE_STRING_BUF_LEN];
 	const char *errstr;
 #endif
 
@@ -140,6 +181,7 @@  void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
 
 #ifdef CONFIG_PRINTK
 	errstr = btrfs_decode_error(errno);
+	btrfs_state_to_string(fs_info, statestr);
 	if (fmt) {
 		struct va_format vaf;
 		va_list args;
@@ -148,12 +190,12 @@  void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
 		vaf.fmt = fmt;
 		vaf.va = &args;
 
-		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
-			sb->s_id, function, line, errno, errstr, &vaf);
+		pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
+			sb->s_id, statestr, function, line, errno, errstr, &vaf);
 		va_end(args);
 	} else {
-		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
-			sb->s_id, function, line, errno, errstr);
+		pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
+			sb->s_id, statestr, function, line, errno, errstr);
 	}
 #endif
 
@@ -240,11 +282,15 @@  void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, .
 	vaf.va = &args;
 
 	if (__ratelimit(ratelimit)) {
-		if (fs_info)
-			printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
-				fs_info->sb->s_id, &vaf);
-		else
+		if (fs_info) {
+			char statestr[STATE_STRING_BUF_LEN];
+
+			btrfs_state_to_string(fs_info, statestr);
+			printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type,
+				fs_info->sb->s_id, statestr, &vaf);
+		} else {
 			printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
+		}
 	}
 
 	va_end(args);