diff mbox series

btrfs: add fs state details to error messages.

Message ID 20220212191042.94954-1-sweettea-kernel@dorminy.me (mailing list archive)
State New, archived
Headers show
Series btrfs: add fs state details to error messages. | expand

Commit Message

Sweet Tea Dorminy Feb. 12, 2022, 7:10 p.m. UTC
When a filesystem goes read-only due to an error, multiple errors tend
to be reported, some of which are knock-on failures. Logging some
fs_states, if any, in btrfs_handle_fs_error() and btrfs_printk()
helps distinguish the first error from subsequent messages which may
only exist due to an error state.

Under the new format, most initial errors will look like:
`BTRFS: error (device loop0) in ...`
while subsequent errors will begin with:
`error (device loop0: state E) in ...`

An initial transaction abort error will look like
`error (device loop0: state X) in ...`
and subsequent messages will contain
`(device loop0: state EX) in ...`

Signed-off-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
---
 fs/btrfs/super.c | 49 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 40 insertions(+), 9 deletions(-)

Comments

David Sterba Feb. 15, 2022, 3:04 p.m. UTC | #1
On Sat, Feb 12, 2022 at 02:10:42PM -0500, Sweet Tea Dorminy wrote:
> When a filesystem goes read-only due to an error, multiple errors tend
> to be reported, some of which are knock-on failures. Logging some
> fs_states, if any, in btrfs_handle_fs_error() and btrfs_printk()
> helps distinguish the first error from subsequent messages which may
> only exist due to an error state.
> 
> Under the new format, most initial errors will look like:
> `BTRFS: error (device loop0) in ...`
> while subsequent errors will begin with:
> `error (device loop0: state E) in ...`
> 
> An initial transaction abort error will look like
> `error (device loop0: state X) in ...`
> and subsequent messages will contain
> `(device loop0: state EX) in ...`
> 
> Signed-off-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
> ---
>  fs/btrfs/super.c | 49 +++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 40 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index 33cfc9e27451..d0e81eb48eac 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -66,6 +66,31 @@ static struct file_system_type btrfs_root_fs_type;
>  
>  static int btrfs_remount(struct super_block *sb, int *flags, char *data);
>  
> +#define STATE_STRING_PREFACE ": state "
> +#define MAX_STATE_CHARS 2
> +
> +static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
> +{
> +	unsigned long state = info->fs_state;
> +	char *curr = buf;
> +
> +	memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
> +	curr += sizeof(STATE_STRING_PREFACE) - 1;
> +
> +	/* If more states are reported, update MAX_STATE_CHARS also */
> +	if (test_and_clear_bit(BTRFS_FS_STATE_ERROR, &state))

The state is supposed to be sticky, so can't be cleared. Also as I read
the suggested change, the "state: " should be visible for all messages
that are printed after the filesystem state changes.

> +		*curr++ = 'E';
> +
> +	if (test_and_clear_bit(BTRFS_FS_STATE_TRANS_ABORTED, &state))
> +		*curr++ = 'X';
> +
> +	/* If no states were printed, reset the buffer */
> +	if (state == info->fs_state)
> +		curr = buf;
> +
> +	*curr++ = '\0';
> +}
> +
>  /*
>   * Generally the error codes correspond to their respective errors, but there
>   * are a few special cases.
> @@ -128,6 +153,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
>  {
>  	struct super_block *sb = fs_info->sb;
>  #ifdef CONFIG_PRINTK
> +	char statestr[sizeof(STATE_STRING_PREFACE) + MAX_STATE_CHARS];
>  	const char *errstr;
>  #endif
>  
> @@ -136,10 +162,11 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
>  	 * under SB_RDONLY, then it is safe here.
>  	 */
>  	if (errno == -EROFS && sb_rdonly(sb))
> -  		return;
> +		return;

Unnecessary change.

>  
>  #ifdef CONFIG_PRINTK
>  	errstr = btrfs_decode_error(errno);
> +	btrfs_state_to_string(fs_info, statestr);
>  	if (fmt) {
>  		struct va_format vaf;
>  		va_list args;
> @@ -148,12 +175,12 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
>  		vaf.fmt = fmt;
>  		vaf.va = &args;
>  
> -		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
> -			sb->s_id, function, line, errno, errstr, &vaf);
> +		pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
> +			sb->s_id, statestr, function, line, errno, errstr, &vaf);

Alternatively the state message can be built into the message itself so
it does not require the extra array.


		pr_crit("btrfs: error(device %s%s%s%s) ...",
			sb->s_id,
			statebits ? ", state" : "",
			test_bit(FS_ERRROR) ? "E" : "",
			test_bit(TRANS_ABORT) ? "T" : "", ...);

This is the idea, final code can use some wrappers around the bit
constatnts.


>  		va_end(args);
>  	} else {
> -		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
> -			sb->s_id, function, line, errno, errstr);
> +		pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
> +			sb->s_id, statestr, function, line, errno, errstr);

Filling the temporary array makes sense as it's used twice, however I'm
not sure if the 'else' branch is ever executed.
Sweet Tea Dorminy Feb. 15, 2022, 4:07 p.m. UTC | #2
>> +static void btrfs_state_to_string(const struct btrfs_fs_info *info, 
>> char *buf)
>> +{
>> +	unsigned long state = info->fs_state;
>> +	char *curr = buf;
>> +
>> +	memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
>> +	curr += sizeof(STATE_STRING_PREFACE) - 1;
>> +
>> +	/* If more states are reported, update MAX_STATE_CHARS also */
>> +	if (test_and_clear_bit(BTRFS_FS_STATE_ERROR, &state))
> 
> The state is supposed to be sticky, so can't be cleared. Also as I read
> the suggested change, the "state: " should be visible for all messages
> that are printed after the filesystem state changes.

'state' is a local copy of info->fs_state, so clearing bits on the local 
copy should be okay, and the "state: " will be present for everything 
after the fs state changes. Could instead use test_bit(&info->fs_state) 
and keep a count of how many states were printed (to know if we need to 
reset the buffer) if that is clearer.
> 
>> +		*curr++ = 'E';
>> +
>> +	if (test_and_clear_bit(BTRFS_FS_STATE_TRANS_ABORTED, &state))
>> +		*curr++ = 'X';
>> +
>> +	/* If no states were printed, reset the buffer */
>> +	if (state == info->fs_state)
>> +		curr = buf;
>> +
>> +	*curr++ = '\0';
>> +}
>> +
>>  /*
>>   * Generally the error codes correspond to their respective errors, 
>> but there
>>   * are a few special cases.
>> @@ -128,6 +153,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info 
>> *fs_info, const char *function
>>  {
>>  	struct super_block *sb = fs_info->sb;
>>  #ifdef CONFIG_PRINTK
>> +	char statestr[sizeof(STATE_STRING_PREFACE) + MAX_STATE_CHARS];
>>  	const char *errstr;
>>  #endif
>> 
>> @@ -136,10 +162,11 @@ void __btrfs_handle_fs_error(struct 
>> btrfs_fs_info *fs_info, const char *function
>>  	 * under SB_RDONLY, then it is safe here.
>>  	 */
>>  	if (errno == -EROFS && sb_rdonly(sb))
>> -  		return;
>> +		return;
> 
> Unnecessary change.
Yeah, there's a stray space at the start of that line, but I can take 
out fixing it.
> 
>> 
>>  #ifdef CONFIG_PRINTK
>>  	errstr = btrfs_decode_error(errno);
>> +	btrfs_state_to_string(fs_info, statestr);
>>  	if (fmt) {
>>  		struct va_format vaf;
>>  		va_list args;
>> @@ -148,12 +175,12 @@ void __btrfs_handle_fs_error(struct 
>> btrfs_fs_info *fs_info, const char *function
>>  		vaf.fmt = fmt;
>>  		vaf.va = &args;
>> 
>> -		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
>> -			sb->s_id, function, line, errno, errstr, &vaf);
>> +		pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
>> +			sb->s_id, statestr, function, line, errno, errstr, &vaf);
> 
> Alternatively the state message can be built into the message itself so
> it does not require the extra array.
> 
> 
> 		pr_crit("btrfs: error(device %s%s%s%s) ...",
> 			sb->s_id,
> 			statebits ? ", state" : "",
> 			test_bit(FS_ERRROR) ? "E" : "",
> 			test_bit(TRANS_ABORT) ? "T" : "", ...);
> 
> This is the idea, final code can use some wrappers around the bit
> constatnts.
> 
> 
>>  		va_end(args);
>>  	} else {
>> -		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
>> -			sb->s_id, function, line, errno, errstr);
>> +		pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
>> +			sb->s_id, statestr, function, line, errno, errstr);
> 
> Filling the temporary array makes sense as it's used twice, however I'm
> not sure if the 'else' branch is ever executed.
There are a bunch of calls with NULL format -> else branch, 
unfortunately.

Thanks!
diff mbox series

Patch

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 33cfc9e27451..d0e81eb48eac 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,6 +66,31 @@  static struct file_system_type btrfs_root_fs_type;
 
 static int btrfs_remount(struct super_block *sb, int *flags, char *data);
 
+#define STATE_STRING_PREFACE ": state "
+#define MAX_STATE_CHARS 2
+
+static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
+{
+	unsigned long state = info->fs_state;
+	char *curr = buf;
+
+	memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
+	curr += sizeof(STATE_STRING_PREFACE) - 1;
+
+	/* If more states are reported, update MAX_STATE_CHARS also */
+	if (test_and_clear_bit(BTRFS_FS_STATE_ERROR, &state))
+		*curr++ = 'E';
+
+	if (test_and_clear_bit(BTRFS_FS_STATE_TRANS_ABORTED, &state))
+		*curr++ = 'X';
+
+	/* If no states were printed, reset the buffer */
+	if (state == info->fs_state)
+		curr = buf;
+
+	*curr++ = '\0';
+}
+
 /*
  * Generally the error codes correspond to their respective errors, but there
  * are a few special cases.
@@ -128,6 +153,7 @@  void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
 {
 	struct super_block *sb = fs_info->sb;
 #ifdef CONFIG_PRINTK
+	char statestr[sizeof(STATE_STRING_PREFACE) + MAX_STATE_CHARS];
 	const char *errstr;
 #endif
 
@@ -136,10 +162,11 @@  void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
 	 * under SB_RDONLY, then it is safe here.
 	 */
 	if (errno == -EROFS && sb_rdonly(sb))
-  		return;
+		return;
 
 #ifdef CONFIG_PRINTK
 	errstr = btrfs_decode_error(errno);
+	btrfs_state_to_string(fs_info, statestr);
 	if (fmt) {
 		struct va_format vaf;
 		va_list args;
@@ -148,12 +175,12 @@  void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
 		vaf.fmt = fmt;
 		vaf.va = &args;
 
-		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
-			sb->s_id, function, line, errno, errstr, &vaf);
+		pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
+			sb->s_id, statestr, function, line, errno, errstr, &vaf);
 		va_end(args);
 	} else {
-		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
-			sb->s_id, function, line, errno, errstr);
+		pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
+			sb->s_id, statestr, function, line, errno, errstr);
 	}
 #endif
 
@@ -240,11 +267,15 @@  void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, .
 	vaf.va = &args;
 
 	if (__ratelimit(ratelimit)) {
-		if (fs_info)
-			printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
-				fs_info->sb->s_id, &vaf);
-		else
+		if (fs_info) {
+			char statestr[sizeof(STATE_STRING_PREFACE) + MAX_STATE_CHARS];
+
+			btrfs_state_to_string(fs_info, statestr);
+			printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type,
+				fs_info->sb->s_id, statestr, &vaf);
+		} else {
 			printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
+		}
 	}
 
 	va_end(args);