diff mbox

[v3] btrfs: relocation: Enhance kernel error output for relocation

Message ID 20170215013905.17677-1-quwenruo@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show

Commit Message

Qu Wenruo Feb. 15, 2017, 1:39 a.m. UTC
When balance(relocation) fails, btrfs-progs will report like:

ERROR: error during balancing '/mnt/scratch': Input/output error
There may be more info in syslog - try dmesg | tail

However kernel can't provide may useful info in many cases to locate the
problem.

This patch will add error messages in relocation to help user and
developer to locate the problem.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
v2:
  Fix typo where 'err' and 'ret' are used wrong.
v3:
  Add space between error number and parenthesis of error string
  Fix gramma errors.
---
 fs/btrfs/relocation.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 57 insertions(+), 4 deletions(-)

Comments

Qu Wenruo March 6, 2017, 3:36 a.m. UTC | #1
Any comment on this patch?

Thanks,
Qu

At 02/15/2017 09:39 AM, Qu Wenruo wrote:
> When balance(relocation) fails, btrfs-progs will report like:
>
> ERROR: error during balancing '/mnt/scratch': Input/output error
> There may be more info in syslog - try dmesg | tail
>
> However kernel can't provide may useful info in many cases to locate the
> problem.
>
> This patch will add error messages in relocation to help user and
> developer to locate the problem.
>
> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
> ---
> v2:
>   Fix typo where 'err' and 'ret' are used wrong.
> v3:
>   Add space between error number and parenthesis of error string
>   Fix gramma errors.
> ---
>  fs/btrfs/relocation.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 57 insertions(+), 4 deletions(-)
>
> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> index 379711048fb0..6de5800e17bc 100644
> --- a/fs/btrfs/relocation.c
> +++ b/fs/btrfs/relocation.c
> @@ -4011,6 +4011,8 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
>  					rc->block_rsv, rc->block_rsv->size,
>  					BTRFS_RESERVE_FLUSH_ALL);
>  		if (ret) {
> +			btrfs_err(fs_info, "failed to reserve space: %d (%s)",
> +				  ret, btrfs_decode_error(ret));
>  			err = ret;
>  			break;
>  		}
> @@ -4019,6 +4021,9 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
>  		if (IS_ERR(trans)) {
>  			err = PTR_ERR(trans);
>  			trans = NULL;
> +			btrfs_err(fs_info,
> +				"failed to start transaction: %d (%s)",
> +				  err, btrfs_decode_error(err));
>  			break;
>  		}
>  restart:
> @@ -4028,8 +4033,12 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
>  		}
>
>  		ret = find_next_extent(rc, path, &key);
> -		if (ret < 0)
> +		if (ret < 0) {
> +			btrfs_err(fs_info,
> +				"failed to find next extent: %d (%s)",
> +				  ret, btrfs_decode_error(ret));
>  			err = ret;
> +		}
>  		if (ret != 0)
>  			break;
>
> @@ -4081,9 +4090,17 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
>
>  		if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
>  			ret = add_tree_block(rc, &key, path, &blocks);
> +			if (ret < 0)
> +				btrfs_err(fs_info,
> +					"failed to record tree block: %d (%s)",
> +					  ret, btrfs_decode_error(ret));
>  		} else if (rc->stage == UPDATE_DATA_PTRS &&
>  			   (flags & BTRFS_EXTENT_FLAG_DATA)) {
>  			ret = add_data_references(rc, &key, path, &blocks);
> +			if (ret < 0)
> +				btrfs_err(fs_info,
> +					"failed to record data extent: %d (%s)",
> +					  ret, btrfs_decode_error(ret));
>  		} else {
>  			btrfs_release_path(path);
>  			ret = 0;
> @@ -4103,6 +4120,9 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
>  				rc->backref_cache.last_trans = trans->transid - 1;
>
>  				if (ret != -EAGAIN) {
> +					btrfs_err(fs_info,
> +				"failed to relocate tree blocks: %d (%s)",
> +						  ret, btrfs_decode_error(ret));
>  					err = ret;
>  					break;
>  				}
> @@ -4121,6 +4141,9 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
>  			ret = relocate_data_extent(rc->data_inode,
>  						   &key, &rc->cluster);
>  			if (ret < 0) {
> +				btrfs_err(fs_info,
> +				"failed to relocate data extent: %d (%s)",
> +					  ret, btrfs_decode_error(ret));
>  				err = ret;
>  				break;
>  			}
> @@ -4147,8 +4170,12 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
>  	if (!err) {
>  		ret = relocate_file_extent_cluster(rc->data_inode,
>  						   &rc->cluster);
> -		if (ret < 0)
> +		if (ret < 0) {
> +			btrfs_err(fs_info,
> +			"failed to relocate file extent cluster: %d (%s)",
> +				  ret, btrfs_decode_error(ret));
>  			err = ret;
> +		}
>  	}
>
>  	rc->create_reloc_tree = 0;
> @@ -4158,6 +4185,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
>  	btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1);
>
>  	err = prepare_to_merge(rc, err);
> +	if (err < 0)
> +		btrfs_err(fs_info,
> +		"failed to prepare merging of relocation trees: %d (%s)",
> +			  err, btrfs_decode_error(err));
>
>  	merge_reloc_roots(rc);
>
> @@ -4336,6 +4367,9 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
>
>  	ret = btrfs_inc_block_group_ro(extent_root, rc->block_group);
>  	if (ret) {
> +		btrfs_err(fs_info,
> +			"failed to set block group read-only: %d (%s)",
> +			  ret, btrfs_decode_error(ret));
>  		err = ret;
>  		goto out;
>  	}
> @@ -4351,10 +4385,19 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
>  					path);
>  	btrfs_free_path(path);
>
> -	if (!IS_ERR(inode))
> +	if (!IS_ERR(inode)) {
>  		ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0);
> -	else
> +		if (ret < 0)
> +			btrfs_err(fs_info,
> +				"failed to delete block group cache: %d (%s)",
> +				  ret, btrfs_decode_error(ret));
> +	} else {
>  		ret = PTR_ERR(inode);
> +		if (ret < 0 && ret != -ENOENT)
> +			btrfs_err(fs_info,
> +				"failed to lookup free space inode: %d (%s)",
> +				  ret, btrfs_decode_error(ret));
> +	}
>
>  	if (ret && ret != -ENOENT) {
>  		err = ret;
> @@ -4364,6 +4407,8 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
>  	rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
>  	if (IS_ERR(rc->data_inode)) {
>  		err = PTR_ERR(rc->data_inode);
> +		btrfs_err(fs_info, "failed to create relocation inode: %d (%s)",
> +			  err, btrfs_decode_error(err));
>  		rc->data_inode = NULL;
>  		goto out;
>  	}
> @@ -4394,6 +4439,9 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
>  			ret = btrfs_wait_ordered_range(rc->data_inode, 0,
>  						       (u64)-1);
>  			if (ret) {
> +				btrfs_err(fs_info,
> +					"failed to wait ordered range: %d (%s)",
> +					  ret, btrfs_decode_error(ret));
>  				err = ret;
>  				goto out;
>  			}
> @@ -4407,6 +4455,11 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
>  	WARN_ON(rc->block_group->reserved > 0);
>  	WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
>  out:
> +	if (err < 0)
> +		btrfs_err(fs_info,
> +			  "failed to relocate block group %llu: %d (%s)",
> +			  rc->block_group->key.objectid, err,
> +			  btrfs_decode_error(err));
>  	if (err && rw)
>  		btrfs_dec_block_group_ro(rc->block_group);
>  	iput(rc->data_inode);
>


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba May 9, 2017, 5:29 p.m. UTC | #2
On Wed, Feb 15, 2017 at 09:39:05AM +0800, Qu Wenruo wrote:
> When balance(relocation) fails, btrfs-progs will report like:
> 
> ERROR: error during balancing '/mnt/scratch': Input/output error
> There may be more info in syslog - try dmesg | tail
> 
> However kernel can't provide may useful info in many cases to locate the
> problem.
> 
> This patch will add error messages in relocation to help user and
> developer to locate the problem.

I think it's too verbose for a user, and not really helpful what to do
after such error message appears in the log. The errors translate name
of the last function that failed, so the user would need to be familiar
with the inner workings of the balance to make sense of it.

The meessages may make sense to a developer, but then it's not necessary
to print them as btrfs_err, but btrfs_debug.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Qu Wenruo May 10, 2017, 3:39 a.m. UTC | #3
At 05/10/2017 01:29 AM, David Sterba wrote:
> On Wed, Feb 15, 2017 at 09:39:05AM +0800, Qu Wenruo wrote:
>> When balance(relocation) fails, btrfs-progs will report like:
>>
>> ERROR: error during balancing '/mnt/scratch': Input/output error
>> There may be more info in syslog - try dmesg | tail
>>
>> However kernel can't provide may useful info in many cases to locate the
>> problem.
>>
>> This patch will add error messages in relocation to help user and
>> developer to locate the problem.
> 
> I think it's too verbose for a user, and not really helpful what to do
> after such error message appears in the log. The errors translate name
> of the last function that failed, so the user would need to be familiar
> with the inner workings of the balance to make sense of it.

Yes, normal user may never need such verbose output.

But it will help developers or support guys to wipe out some really easy 
cases.

> 
> The meessages may make sense to a developer, but then it's not necessary
> to print them as btrfs_err, but btrfs_debug.

I also considered btrfs_debug, but the problem is btrfs_debug() depend 
on either CONFIG_DYANMIC_DEBUG or DEBUG.

So when problem happens in real world, we're too late to ensure such output.

Thanks,
Qu



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba May 11, 2017, 6:40 p.m. UTC | #4
On Wed, May 10, 2017 at 11:39:40AM +0800, Qu Wenruo wrote:
> 
> 
> At 05/10/2017 01:29 AM, David Sterba wrote:
> > On Wed, Feb 15, 2017 at 09:39:05AM +0800, Qu Wenruo wrote:
> >> When balance(relocation) fails, btrfs-progs will report like:
> >>
> >> ERROR: error during balancing '/mnt/scratch': Input/output error
> >> There may be more info in syslog - try dmesg | tail
> >>
> >> However kernel can't provide may useful info in many cases to locate the
> >> problem.
> >>
> >> This patch will add error messages in relocation to help user and
> >> developer to locate the problem.
> > 
> > I think it's too verbose for a user, and not really helpful what to do
> > after such error message appears in the log. The errors translate name
> > of the last function that failed, so the user would need to be familiar
> > with the inner workings of the balance to make sense of it.
> 
> Yes, normal user may never need such verbose output.
> 
> But it will help developers or support guys to wipe out some really easy 
> cases.
> 
> > 
> > The meessages may make sense to a developer, but then it's not necessary
> > to print them as btrfs_err, but btrfs_debug.
> 
> I also considered btrfs_debug, but the problem is btrfs_debug() depend 
> on either CONFIG_DYANMIC_DEBUG or DEBUG.
> 
> So when problem happens in real world, we're too late to ensure such output.

The I think we need some way that is not as noisy as btrfs_err but also
compiled-in by default unlike the dynamic debug.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 379711048fb0..6de5800e17bc 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4011,6 +4011,8 @@  static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
 					rc->block_rsv, rc->block_rsv->size,
 					BTRFS_RESERVE_FLUSH_ALL);
 		if (ret) {
+			btrfs_err(fs_info, "failed to reserve space: %d (%s)",
+				  ret, btrfs_decode_error(ret));
 			err = ret;
 			break;
 		}
@@ -4019,6 +4021,9 @@  static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
 		if (IS_ERR(trans)) {
 			err = PTR_ERR(trans);
 			trans = NULL;
+			btrfs_err(fs_info,
+				"failed to start transaction: %d (%s)",
+				  err, btrfs_decode_error(err));
 			break;
 		}
 restart:
@@ -4028,8 +4033,12 @@  static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
 		}
 
 		ret = find_next_extent(rc, path, &key);
-		if (ret < 0)
+		if (ret < 0) {
+			btrfs_err(fs_info,
+				"failed to find next extent: %d (%s)",
+				  ret, btrfs_decode_error(ret));
 			err = ret;
+		}
 		if (ret != 0)
 			break;
 
@@ -4081,9 +4090,17 @@  static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
 
 		if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
 			ret = add_tree_block(rc, &key, path, &blocks);
+			if (ret < 0)
+				btrfs_err(fs_info,
+					"failed to record tree block: %d (%s)",
+					  ret, btrfs_decode_error(ret));
 		} else if (rc->stage == UPDATE_DATA_PTRS &&
 			   (flags & BTRFS_EXTENT_FLAG_DATA)) {
 			ret = add_data_references(rc, &key, path, &blocks);
+			if (ret < 0)
+				btrfs_err(fs_info,
+					"failed to record data extent: %d (%s)",
+					  ret, btrfs_decode_error(ret));
 		} else {
 			btrfs_release_path(path);
 			ret = 0;
@@ -4103,6 +4120,9 @@  static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
 				rc->backref_cache.last_trans = trans->transid - 1;
 
 				if (ret != -EAGAIN) {
+					btrfs_err(fs_info,
+				"failed to relocate tree blocks: %d (%s)",
+						  ret, btrfs_decode_error(ret));
 					err = ret;
 					break;
 				}
@@ -4121,6 +4141,9 @@  static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
 			ret = relocate_data_extent(rc->data_inode,
 						   &key, &rc->cluster);
 			if (ret < 0) {
+				btrfs_err(fs_info,
+				"failed to relocate data extent: %d (%s)",
+					  ret, btrfs_decode_error(ret));
 				err = ret;
 				break;
 			}
@@ -4147,8 +4170,12 @@  static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
 	if (!err) {
 		ret = relocate_file_extent_cluster(rc->data_inode,
 						   &rc->cluster);
-		if (ret < 0)
+		if (ret < 0) {
+			btrfs_err(fs_info,
+			"failed to relocate file extent cluster: %d (%s)",
+				  ret, btrfs_decode_error(ret));
 			err = ret;
+		}
 	}
 
 	rc->create_reloc_tree = 0;
@@ -4158,6 +4185,10 @@  static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
 	btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1);
 
 	err = prepare_to_merge(rc, err);
+	if (err < 0)
+		btrfs_err(fs_info,
+		"failed to prepare merging of relocation trees: %d (%s)",
+			  err, btrfs_decode_error(err));
 
 	merge_reloc_roots(rc);
 
@@ -4336,6 +4367,9 @@  int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
 
 	ret = btrfs_inc_block_group_ro(extent_root, rc->block_group);
 	if (ret) {
+		btrfs_err(fs_info,
+			"failed to set block group read-only: %d (%s)",
+			  ret, btrfs_decode_error(ret));
 		err = ret;
 		goto out;
 	}
@@ -4351,10 +4385,19 @@  int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
 					path);
 	btrfs_free_path(path);
 
-	if (!IS_ERR(inode))
+	if (!IS_ERR(inode)) {
 		ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0);
-	else
+		if (ret < 0)
+			btrfs_err(fs_info,
+				"failed to delete block group cache: %d (%s)",
+				  ret, btrfs_decode_error(ret));
+	} else {
 		ret = PTR_ERR(inode);
+		if (ret < 0 && ret != -ENOENT)
+			btrfs_err(fs_info,
+				"failed to lookup free space inode: %d (%s)",
+				  ret, btrfs_decode_error(ret));
+	}
 
 	if (ret && ret != -ENOENT) {
 		err = ret;
@@ -4364,6 +4407,8 @@  int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
 	rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
 	if (IS_ERR(rc->data_inode)) {
 		err = PTR_ERR(rc->data_inode);
+		btrfs_err(fs_info, "failed to create relocation inode: %d (%s)",
+			  err, btrfs_decode_error(err));
 		rc->data_inode = NULL;
 		goto out;
 	}
@@ -4394,6 +4439,9 @@  int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
 			ret = btrfs_wait_ordered_range(rc->data_inode, 0,
 						       (u64)-1);
 			if (ret) {
+				btrfs_err(fs_info,
+					"failed to wait ordered range: %d (%s)",
+					  ret, btrfs_decode_error(ret));
 				err = ret;
 				goto out;
 			}
@@ -4407,6 +4455,11 @@  int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
 	WARN_ON(rc->block_group->reserved > 0);
 	WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
 out:
+	if (err < 0)
+		btrfs_err(fs_info,
+			  "failed to relocate block group %llu: %d (%s)",
+			  rc->block_group->key.objectid, err,
+			  btrfs_decode_error(err));
 	if (err && rw)
 		btrfs_dec_block_group_ro(rc->block_group);
 	iput(rc->data_inode);