Message ID | 20220212191042.94954-1-sweettea-kernel@dorminy.me (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: add fs state details to error messages. | expand |
On Sat, Feb 12, 2022 at 02:10:42PM -0500, Sweet Tea Dorminy wrote: > When a filesystem goes read-only due to an error, multiple errors tend > to be reported, some of which are knock-on failures. Logging some > fs_states, if any, in btrfs_handle_fs_error() and btrfs_printk() > helps distinguish the first error from subsequent messages which may > only exist due to an error state. > > Under the new format, most initial errors will look like: > `BTRFS: error (device loop0) in ...` > while subsequent errors will begin with: > `error (device loop0: state E) in ...` > > An initial transaction abort error will look like > `error (device loop0: state X) in ...` > and subsequent messages will contain > `(device loop0: state EX) in ...` > > Signed-off-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me> > --- > fs/btrfs/super.c | 49 +++++++++++++++++++++++++++++++++++++++--------- > 1 file changed, 40 insertions(+), 9 deletions(-) > > diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c > index 33cfc9e27451..d0e81eb48eac 100644 > --- a/fs/btrfs/super.c > +++ b/fs/btrfs/super.c > @@ -66,6 +66,31 @@ static struct file_system_type btrfs_root_fs_type; > > static int btrfs_remount(struct super_block *sb, int *flags, char *data); > > +#define STATE_STRING_PREFACE ": state " > +#define MAX_STATE_CHARS 2 > + > +static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf) > +{ > + unsigned long state = info->fs_state; > + char *curr = buf; > + > + memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE)); > + curr += sizeof(STATE_STRING_PREFACE) - 1; > + > + /* If more states are reported, update MAX_STATE_CHARS also */ > + if (test_and_clear_bit(BTRFS_FS_STATE_ERROR, &state)) The state is supposed to be sticky, so can't be cleared. Also as I read the suggested change, the "state: " should be visible for all messages that are printed after the filesystem state changes. > + *curr++ = 'E'; > + > + if (test_and_clear_bit(BTRFS_FS_STATE_TRANS_ABORTED, &state)) > + *curr++ = 'X'; > + > + /* If no states were printed, reset the buffer */ > + if (state == info->fs_state) > + curr = buf; > + > + *curr++ = '\0'; > +} > + > /* > * Generally the error codes correspond to their respective errors, but there > * are a few special cases. > @@ -128,6 +153,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function > { > struct super_block *sb = fs_info->sb; > #ifdef CONFIG_PRINTK > + char statestr[sizeof(STATE_STRING_PREFACE) + MAX_STATE_CHARS]; > const char *errstr; > #endif > > @@ -136,10 +162,11 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function > * under SB_RDONLY, then it is safe here. > */ > if (errno == -EROFS && sb_rdonly(sb)) > - return; > + return; Unnecessary change. > > #ifdef CONFIG_PRINTK > errstr = btrfs_decode_error(errno); > + btrfs_state_to_string(fs_info, statestr); > if (fmt) { > struct va_format vaf; > va_list args; > @@ -148,12 +175,12 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function > vaf.fmt = fmt; > vaf.va = &args; > > - pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n", > - sb->s_id, function, line, errno, errstr, &vaf); > + pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n", > + sb->s_id, statestr, function, line, errno, errstr, &vaf); Alternatively the state message can be built into the message itself so it does not require the extra array. pr_crit("btrfs: error(device %s%s%s%s) ...", sb->s_id, statebits ? ", state" : "", test_bit(FS_ERRROR) ? "E" : "", test_bit(TRANS_ABORT) ? "T" : "", ...); This is the idea, final code can use some wrappers around the bit constatnts. > va_end(args); > } else { > - pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n", > - sb->s_id, function, line, errno, errstr); > + pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n", > + sb->s_id, statestr, function, line, errno, errstr); Filling the temporary array makes sense as it's used twice, however I'm not sure if the 'else' branch is ever executed.
>> +static void btrfs_state_to_string(const struct btrfs_fs_info *info, >> char *buf) >> +{ >> + unsigned long state = info->fs_state; >> + char *curr = buf; >> + >> + memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE)); >> + curr += sizeof(STATE_STRING_PREFACE) - 1; >> + >> + /* If more states are reported, update MAX_STATE_CHARS also */ >> + if (test_and_clear_bit(BTRFS_FS_STATE_ERROR, &state)) > > The state is supposed to be sticky, so can't be cleared. Also as I read > the suggested change, the "state: " should be visible for all messages > that are printed after the filesystem state changes. 'state' is a local copy of info->fs_state, so clearing bits on the local copy should be okay, and the "state: " will be present for everything after the fs state changes. Could instead use test_bit(&info->fs_state) and keep a count of how many states were printed (to know if we need to reset the buffer) if that is clearer. > >> + *curr++ = 'E'; >> + >> + if (test_and_clear_bit(BTRFS_FS_STATE_TRANS_ABORTED, &state)) >> + *curr++ = 'X'; >> + >> + /* If no states were printed, reset the buffer */ >> + if (state == info->fs_state) >> + curr = buf; >> + >> + *curr++ = '\0'; >> +} >> + >> /* >> * Generally the error codes correspond to their respective errors, >> but there >> * are a few special cases. >> @@ -128,6 +153,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info >> *fs_info, const char *function >> { >> struct super_block *sb = fs_info->sb; >> #ifdef CONFIG_PRINTK >> + char statestr[sizeof(STATE_STRING_PREFACE) + MAX_STATE_CHARS]; >> const char *errstr; >> #endif >> >> @@ -136,10 +162,11 @@ void __btrfs_handle_fs_error(struct >> btrfs_fs_info *fs_info, const char *function >> * under SB_RDONLY, then it is safe here. >> */ >> if (errno == -EROFS && sb_rdonly(sb)) >> - return; >> + return; > > Unnecessary change. Yeah, there's a stray space at the start of that line, but I can take out fixing it. > >> >> #ifdef CONFIG_PRINTK >> errstr = btrfs_decode_error(errno); >> + btrfs_state_to_string(fs_info, statestr); >> if (fmt) { >> struct va_format vaf; >> va_list args; >> @@ -148,12 +175,12 @@ void __btrfs_handle_fs_error(struct >> btrfs_fs_info *fs_info, const char *function >> vaf.fmt = fmt; >> vaf.va = &args; >> >> - pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n", >> - sb->s_id, function, line, errno, errstr, &vaf); >> + pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n", >> + sb->s_id, statestr, function, line, errno, errstr, &vaf); > > Alternatively the state message can be built into the message itself so > it does not require the extra array. > > > pr_crit("btrfs: error(device %s%s%s%s) ...", > sb->s_id, > statebits ? ", state" : "", > test_bit(FS_ERRROR) ? "E" : "", > test_bit(TRANS_ABORT) ? "T" : "", ...); > > This is the idea, final code can use some wrappers around the bit > constatnts. > > >> va_end(args); >> } else { >> - pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n", >> - sb->s_id, function, line, errno, errstr); >> + pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n", >> + sb->s_id, statestr, function, line, errno, errstr); > > Filling the temporary array makes sense as it's used twice, however I'm > not sure if the 'else' branch is ever executed. There are a bunch of calls with NULL format -> else branch, unfortunately. Thanks!
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 33cfc9e27451..d0e81eb48eac 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -66,6 +66,31 @@ static struct file_system_type btrfs_root_fs_type; static int btrfs_remount(struct super_block *sb, int *flags, char *data); +#define STATE_STRING_PREFACE ": state " +#define MAX_STATE_CHARS 2 + +static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf) +{ + unsigned long state = info->fs_state; + char *curr = buf; + + memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE)); + curr += sizeof(STATE_STRING_PREFACE) - 1; + + /* If more states are reported, update MAX_STATE_CHARS also */ + if (test_and_clear_bit(BTRFS_FS_STATE_ERROR, &state)) + *curr++ = 'E'; + + if (test_and_clear_bit(BTRFS_FS_STATE_TRANS_ABORTED, &state)) + *curr++ = 'X'; + + /* If no states were printed, reset the buffer */ + if (state == info->fs_state) + curr = buf; + + *curr++ = '\0'; +} + /* * Generally the error codes correspond to their respective errors, but there * are a few special cases. @@ -128,6 +153,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function { struct super_block *sb = fs_info->sb; #ifdef CONFIG_PRINTK + char statestr[sizeof(STATE_STRING_PREFACE) + MAX_STATE_CHARS]; const char *errstr; #endif @@ -136,10 +162,11 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function * under SB_RDONLY, then it is safe here. */ if (errno == -EROFS && sb_rdonly(sb)) - return; + return; #ifdef CONFIG_PRINTK errstr = btrfs_decode_error(errno); + btrfs_state_to_string(fs_info, statestr); if (fmt) { struct va_format vaf; va_list args; @@ -148,12 +175,12 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function vaf.fmt = fmt; vaf.va = &args; - pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n", - sb->s_id, function, line, errno, errstr, &vaf); + pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n", + sb->s_id, statestr, function, line, errno, errstr, &vaf); va_end(args); } else { - pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n", - sb->s_id, function, line, errno, errstr); + pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n", + sb->s_id, statestr, function, line, errno, errstr); } #endif @@ -240,11 +267,15 @@ void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, . vaf.va = &args; if (__ratelimit(ratelimit)) { - if (fs_info) - printk("%sBTRFS %s (device %s): %pV\n", lvl, type, - fs_info->sb->s_id, &vaf); - else + if (fs_info) { + char statestr[sizeof(STATE_STRING_PREFACE) + MAX_STATE_CHARS]; + + btrfs_state_to_string(fs_info, statestr); + printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type, + fs_info->sb->s_id, statestr, &vaf); + } else { printk("%sBTRFS %s: %pV\n", lvl, type, &vaf); + } } va_end(args);
When a filesystem goes read-only due to an error, multiple errors tend to be reported, some of which are knock-on failures. Logging some fs_states, if any, in btrfs_handle_fs_error() and btrfs_printk() helps distinguish the first error from subsequent messages which may only exist due to an error state. Under the new format, most initial errors will look like: `BTRFS: error (device loop0) in ...` while subsequent errors will begin with: `error (device loop0: state E) in ...` An initial transaction abort error will look like `error (device loop0: state X) in ...` and subsequent messages will contain `(device loop0: state EX) in ...` Signed-off-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me> --- fs/btrfs/super.c | 49 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 9 deletions(-)