diff mbox series

btrfs: output extra info when delayed inode update failed

Message ID aee666e094ffa89d5d4f6bc733230ae60ee3e6d8.1696573282.git.wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: output extra info when delayed inode update failed | expand

Commit Message

Qu Wenruo Oct. 6, 2023, 6:22 a.m. UTC
There are at least 2 bug reports internally showing transaction abort
due to -ENOENT, from the btrfs_abort_transaction() call inside
__btrfs_run_delayed_items().

For now I don't have a concrete idea on why, but strongly it's the
following call chain causing the problem:

__btrfs_commit_inode_delayed_items()
`- btrfs_update_delayed_inode()
   `- __btrfs_update_delayed_inode()
      `- btrfs_lookup_inode()

This patch would add extra debug for the involved call chain, with
possible leaf dump if needed.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
I strongly assume it's the the case described above, in that case the
fix would just follow btrfs_delete_delayed_inode() to exit early if no
inode item can be found, and not return -ENOENT.

In that case, some debug here can also be removed as they would be too
noisy for regular operations.
---
 fs/btrfs/delayed-inode.c | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

Comments

David Sterba Oct. 6, 2023, 2:36 p.m. UTC | #1
On Fri, Oct 06, 2023 at 04:52:09PM +1030, Qu Wenruo wrote:
> There are at least 2 bug reports internally showing transaction abort

Please add links to the reports

> due to -ENOENT, from the btrfs_abort_transaction() call inside
> __btrfs_run_delayed_items().
> 
> For now I don't have a concrete idea on why, but strongly it's the
> following call chain causing the problem:
> 
> __btrfs_commit_inode_delayed_items()
> `- btrfs_update_delayed_inode()
>    `- __btrfs_update_delayed_inode()
>       `- btrfs_lookup_inode()
> 
> This patch would add extra debug for the involved call chain, with
> possible leaf dump if needed.
> 
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
> I strongly assume it's the the case described above, in that case the
> fix would just follow btrfs_delete_delayed_inode() to exit early if no
> inode item can be found, and not return -ENOENT.
> 
> In that case, some debug here can also be removed as they would be too
> noisy for regular operations.

This is under the -- marker but somehow feels like it should be in the
changelog to as it adds more context, you can add it as a Note: to make
it separate from the regular changelog.

> ---
>  fs/btrfs/delayed-inode.c | 30 ++++++++++++++++++++++++++----
>  1 file changed, 26 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
> index 35d7616615c1..1b05c7a818ec 100644
> --- a/fs/btrfs/delayed-inode.c
> +++ b/fs/btrfs/delayed-inode.c
> @@ -19,6 +19,7 @@
>  #include "space-info.h"
>  #include "accessors.h"
>  #include "file-item.h"
> +#include "print-tree.h"
>  
>  #define BTRFS_DELAYED_WRITEBACK		512
>  #define BTRFS_DELAYED_BACKGROUND	128
> @@ -1021,10 +1022,16 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
>  		mod = 1;
>  
>  	ret = btrfs_lookup_inode(trans, root, path, &key, mod);
> -	if (ret > 0)
> +	if (ret > 0) {
> +		btrfs_print_leaf(path->nodes[0]);
>  		ret = -ENOENT;
> -	if (ret < 0)
> +	}
> +	if (ret < 0) {
> +		btrfs_err(fs_info,
> +			"failed to locate inode item for root %lld ino %lld: %d",
> +			root->root_key.objectid, node->inode_id, ret);
>  		goto out;
> +	}
>  
>  	leaf = path->nodes[0];
>  	inode_item = btrfs_item_ptr(leaf, path->slots[0],
> @@ -1054,6 +1061,12 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
>  	 * in the same item doesn't exist.
>  	 */
>  	ret = btrfs_del_item(trans, root, path);
> +	if (ret < 0) {
> +		btrfs_print_leaf(path->nodes[0]);
> +		btrfs_err(fs_info,
> +		"failed to delete inode ref item, key (%llu %u %llu): %d",
> +			  key.objectid, key.type, key.offset, ret);

It looks like a return or goto error is missing here, if this is
supposed to continue without that then please add a comment like
/* Fallthrough */.

> +	}
>  out:
>  	btrfs_release_delayed_iref(node);
>  	btrfs_release_path(path);
> @@ -1114,14 +1127,23 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
>  	int ret;
>  
>  	ret = btrfs_insert_delayed_items(trans, path, node->root, node);
> -	if (ret)
> +	if (ret) {
> +		btrfs_err(trans->fs_info, "failedd to insert delayed items: %d",
                                           ^^^^^^^

> +			  ret);
>  		return ret;
> +	}
>  
>  	ret = btrfs_delete_delayed_items(trans, path, node->root, node);
> -	if (ret)
> +	if (ret) {
> +		btrfs_err(trans->fs_info, "failedd to delete delayed items: %d",
                                           ^^^^^^^
> +			  ret);
>  		return ret;
> +	}
>  
>  	ret = btrfs_update_delayed_inode(trans, node->root, path, node);
> +	if (ret)
> +		btrfs_err(trans->fs_info, "failedd to update delayed items: %d",
                                           ^^^^^^^

typos

> +			  ret);
>  	return ret;
>  }
>  
> -- 
> 2.42.0
Qu Wenruo Oct. 12, 2023, 3:07 a.m. UTC | #2
On 2023/10/7 01:06, David Sterba wrote:
> On Fri, Oct 06, 2023 at 04:52:09PM +1030, Qu Wenruo wrote:
>> There are at least 2 bug reports internally showing transaction abort
>
> Please add links to the reports

Unfortunately all internal suse bugzillas.

>
>> due to -ENOENT, from the btrfs_abort_transaction() call inside
>> __btrfs_run_delayed_items().
>>
>> For now I don't have a concrete idea on why, but strongly it's the
>> following call chain causing the problem:
>>
>> __btrfs_commit_inode_delayed_items()
>> `- btrfs_update_delayed_inode()
>>     `- __btrfs_update_delayed_inode()
>>        `- btrfs_lookup_inode()
>>
>> This patch would add extra debug for the involved call chain, with
>> possible leaf dump if needed.
>>
>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>> ---
>> I strongly assume it's the the case described above, in that case the
>> fix would just follow btrfs_delete_delayed_inode() to exit early if no
>> inode item can be found, and not return -ENOENT.
>>
>> In that case, some debug here can also be removed as they would be too
>> noisy for regular operations.
>
> This is under the -- marker but somehow feels like it should be in the
> changelog to as it adds more context, you can add it as a Note: to make
> it separate from the regular changelog.

I'd prefer this to be hidden from the change log.
Although this indeed makes the whole patch more like an RFC.

And unfortunately I didn't hear the feedback from the reporter yet, thus
still not 100% sure if the above fix is correct.

>
>> ---
>>   fs/btrfs/delayed-inode.c | 30 ++++++++++++++++++++++++++----
>>   1 file changed, 26 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
>> index 35d7616615c1..1b05c7a818ec 100644
>> --- a/fs/btrfs/delayed-inode.c
>> +++ b/fs/btrfs/delayed-inode.c
>> @@ -19,6 +19,7 @@
>>   #include "space-info.h"
>>   #include "accessors.h"
>>   #include "file-item.h"
>> +#include "print-tree.h"
>>
>>   #define BTRFS_DELAYED_WRITEBACK		512
>>   #define BTRFS_DELAYED_BACKGROUND	128
>> @@ -1021,10 +1022,16 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
>>   		mod = 1;
>>
>>   	ret = btrfs_lookup_inode(trans, root, path, &key, mod);
>> -	if (ret > 0)
>> +	if (ret > 0) {
>> +		btrfs_print_leaf(path->nodes[0]);
>>   		ret = -ENOENT;
>> -	if (ret < 0)
>> +	}
>> +	if (ret < 0) {
>> +		btrfs_err(fs_info,
>> +			"failed to locate inode item for root %lld ino %lld: %d",
>> +			root->root_key.objectid, node->inode_id, ret);
>>   		goto out;
>> +	}
>>
>>   	leaf = path->nodes[0];
>>   	inode_item = btrfs_item_ptr(leaf, path->slots[0],
>> @@ -1054,6 +1061,12 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
>>   	 * in the same item doesn't exist.
>>   	 */
>>   	ret = btrfs_del_item(trans, root, path);
>> +	if (ret < 0) {
>> +		btrfs_print_leaf(path->nodes[0]);
>> +		btrfs_err(fs_info,
>> +		"failed to delete inode ref item, key (%llu %u %llu): %d",
>> +			  key.objectid, key.type, key.offset, ret);
>
> It looks like a return or goto error is missing here, if this is
> supposed to continue without that then please add a comment like
> /* Fallthrough */.

Would add the falthrough comment.

Thanks,
Qu
>
>> +	}
>>   out:
>>   	btrfs_release_delayed_iref(node);
>>   	btrfs_release_path(path);
>> @@ -1114,14 +1127,23 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
>>   	int ret;
>>
>>   	ret = btrfs_insert_delayed_items(trans, path, node->root, node);
>> -	if (ret)
>> +	if (ret) {
>> +		btrfs_err(trans->fs_info, "failedd to insert delayed items: %d",
>                                             ^^^^^^^
>
>> +			  ret);
>>   		return ret;
>> +	}
>>
>>   	ret = btrfs_delete_delayed_items(trans, path, node->root, node);
>> -	if (ret)
>> +	if (ret) {
>> +		btrfs_err(trans->fs_info, "failedd to delete delayed items: %d",
>                                             ^^^^^^^
>> +			  ret);
>>   		return ret;
>> +	}
>>
>>   	ret = btrfs_update_delayed_inode(trans, node->root, path, node);
>> +	if (ret)
>> +		btrfs_err(trans->fs_info, "failedd to update delayed items: %d",
>                                             ^^^^^^^
>
> typos
>
>> +			  ret);
>>   	return ret;
>>   }
>>
>> --
>> 2.42.0
diff mbox series

Patch

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 35d7616615c1..1b05c7a818ec 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -19,6 +19,7 @@ 
 #include "space-info.h"
 #include "accessors.h"
 #include "file-item.h"
+#include "print-tree.h"
 
 #define BTRFS_DELAYED_WRITEBACK		512
 #define BTRFS_DELAYED_BACKGROUND	128
@@ -1021,10 +1022,16 @@  static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
 		mod = 1;
 
 	ret = btrfs_lookup_inode(trans, root, path, &key, mod);
-	if (ret > 0)
+	if (ret > 0) {
+		btrfs_print_leaf(path->nodes[0]);
 		ret = -ENOENT;
-	if (ret < 0)
+	}
+	if (ret < 0) {
+		btrfs_err(fs_info,
+			"failed to locate inode item for root %lld ino %lld: %d",
+			root->root_key.objectid, node->inode_id, ret);
 		goto out;
+	}
 
 	leaf = path->nodes[0];
 	inode_item = btrfs_item_ptr(leaf, path->slots[0],
@@ -1054,6 +1061,12 @@  static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
 	 * in the same item doesn't exist.
 	 */
 	ret = btrfs_del_item(trans, root, path);
+	if (ret < 0) {
+		btrfs_print_leaf(path->nodes[0]);
+		btrfs_err(fs_info,
+		"failed to delete inode ref item, key (%llu %u %llu): %d",
+			  key.objectid, key.type, key.offset, ret);
+	}
 out:
 	btrfs_release_delayed_iref(node);
 	btrfs_release_path(path);
@@ -1114,14 +1127,23 @@  __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
 	int ret;
 
 	ret = btrfs_insert_delayed_items(trans, path, node->root, node);
-	if (ret)
+	if (ret) {
+		btrfs_err(trans->fs_info, "failedd to insert delayed items: %d",
+			  ret);
 		return ret;
+	}
 
 	ret = btrfs_delete_delayed_items(trans, path, node->root, node);
-	if (ret)
+	if (ret) {
+		btrfs_err(trans->fs_info, "failedd to delete delayed items: %d",
+			  ret);
 		return ret;
+	}
 
 	ret = btrfs_update_delayed_inode(trans, node->root, path, node);
+	if (ret)
+		btrfs_err(trans->fs_info, "failedd to update delayed items: %d",
+			  ret);
 	return ret;
 }