diff mbox series

[2/2] btrfs: fix qgroup rsv leaks in cow_file_range

Message ID 81a472c49ed24d85ed3c164ac46b8d4e6cc9d1e1.1721775142.git.boris@bur.io (mailing list archive)
State New, archived
Headers show
Series btrfs: fixes for buffered write qgroup rsv leaks | expand

Commit Message

Boris Burkov July 23, 2024, 10:55 p.m. UTC
In the buffered write path, the dirty page owns the qgroup rsv until it
creates an ordered_extent.

Therefore, any errors that occur before the ordered_extent is created
must free that reservation, or else the space is leaked. The fstest
generic/475 exercises various IO error paths, and is able to trigger
errors in cow_file_range where we fail to get to allocating the ordered
extent. Note that because we *do* clear delalloc, we are likely to
remove the inode from the delalloc list, so the inodes/pages to not have
invalidate/launder called on them in the commit abort path.

This results in failures at the unmount stage of the test that look like:

[ 1903.401193] BTRFS: error (device dm-8 state EA) in
cleanup_transaction:2018: errno=-5 IO failure
[ 1903.402686] BTRFS: error (device dm-8 state EA) in
btrfs_replace_file_extents:2416: errno=-5 IO failure
[ 1903.446415] BTRFS warning (device dm-8 state EA): qgroup 0/5 has
unreleased space, type 0 rsv 28672
[ 1903.447887] ------------[ cut here ]------------
[ 1903.448645] WARNING: CPU: 3 PID: 22588 at fs/btrfs/disk-io.c:4333
close_ctree+0x222/0x4d0 [btrfs]
[ 1903.450130] Modules linked in: btrfs blake2b_generic libcrc32c xor
zstd_compress raid6_pq
[ 1903.451408] CPU: 3 PID: 22588 Comm: umount Kdump: loaded Tainted: G
W          6.10.0-rc7-gab56fde445b8 #21
[ 1903.453058] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS Arch Linux 1.16.3-1-1 04/01/2014
[ 1903.454542] RIP: 0010:close_ctree+0x222/0x4d0 [btrfs]
[ 1903.455417] Code: 4d c0 48 c7 c6 a0 92 4d c0 48 c7 c7 78 82 4d c0 e8
63 22 36 d7 90 0f 0b f0 80 4b 10 02 48 89 df e8 33 dc fb ff 84 c0 74 13
90 <0f> 0b 90 48 c7 c6 c8 92 4d c0 48 89 df e8 0c 22 01 00 48 89 df e8
[ 1903.458317] RSP: 0018:ffffb4465283be00 EFLAGS: 00010202
[ 1903.459159] RAX: 0000000000000001 RBX: ffffa1a1818e1000 RCX:
0000000000000001
[ 1903.460286] RDX: 0000000000000000 RSI: ffffb4465283bbe0 RDI:
ffffa1a19374fcb8
[ 1903.461408] RBP: ffffa1a1818e13c0 R08: 0000000100028b16 R09:
0000000000000000
[ 1903.462555] R10: 0000000000000003 R11: 0000000000000003 R12:
ffffa1a18ad7972c
[ 1903.463679] R13: 0000000000000000 R14: 0000000000000000 R15:
0000000000000000
[ 1903.464803] FS:  00007f9168312b80(0000) GS:ffffa1a4afcc0000(0000)
knlGS:0000000000000000
[ 1903.466082] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1903.467004] CR2: 00007f91683c9140 CR3: 000000010acaa000 CR4:
00000000000006f0
[ 1903.468124] Call Trace:
[ 1903.468548]  <TASK>
[ 1903.468890]  ? close_ctree+0x222/0x4d0 [btrfs]
[ 1903.469689]  ? __warn.cold+0x8e/0xea
[ 1903.470260]  ? close_ctree+0x222/0x4d0 [btrfs]
[ 1903.471052]  ? report_bug+0xff/0x140
[ 1903.471646]  ? handle_bug+0x3b/0x70
[ 1903.472212]  ? exc_invalid_op+0x17/0x70
[ 1903.472838]  ? asm_exc_invalid_op+0x1a/0x20
[ 1903.473518]  ? close_ctree+0x222/0x4d0 [btrfs]
[ 1903.474283]  generic_shutdown_super+0x70/0x160
[ 1903.475005]  kill_anon_super+0x11/0x40
[ 1903.475630]  btrfs_kill_super+0x11/0x20 [btrfs]
[ 1903.476405]  deactivate_locked_super+0x2e/0xa0
[ 1903.477125]  cleanup_mnt+0xb5/0x150
[ 1903.477699]  task_work_run+0x57/0x80
[ 1903.478267]  syscall_exit_to_user_mode+0x121/0x130
[ 1903.479056]  do_syscall_64+0xab/0x1a0
[ 1903.479658]  entry_SYSCALL_64_after_hwframe+0x77/0x7f
[ 1903.480467] RIP: 0033:0x7f916847a887
[ 1903.481034] Code: 0d 00 f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44
00 00 31 f6 e9 09 00 00 00 66 0f 1f 84 00 00 00 00 00 b8 a6 00 00 00 0f
05 <48> 3d 00 f0 ff ff 77 01 c3 48 8b 15 71 25 0d 00 f7 d8 64 89 02 b8
[ 1903.483951] RSP: 002b:00007ffe035d1648 EFLAGS: 00000246 ORIG_RAX:
00000000000000a6
[ 1903.485153] RAX: 0000000000000000 RBX: 000056074eba0508 RCX:
00007f916847a887
[ 1903.486244] RDX: 0000000000000000 RSI: 0000000000000000 RDI:
000056074eba0810
[ 1903.487128] RBP: 0000000000000000 R08: 00007ffe035d03f0 R09:
0000000000000001
[ 1903.488010] R10: 0000000000000103 R11: 0000000000000246 R12:
00007f91685cc22c
[ 1903.488905] R13: 000056074eba0810 R14: 0000000000000000 R15:
000056074eba0400
[ 1903.489792]  </TASK>
[ 1903.490071] ---[ end trace 0000000000000000 ]---
[ 1903.490657] BTRFS error (device dm-8 state EA): qgroup reserved space leaked

Cases 2 and 3 in the out_reserve path both pertain to this type of leak
and must free the reserved qgroup data. Because it is already an error
path, I opted not to handle the possible errors in
btrfs_free_qgroup_data.

Signed-off-by: Boris Burkov <boris@bur.io>
---
 fs/btrfs/inode.c | 3 +++
 1 file changed, 3 insertions(+)

Comments

Qu Wenruo July 23, 2024, 11:24 p.m. UTC | #1
在 2024/7/24 08:25, Boris Burkov 写道:
> In the buffered write path, the dirty page owns the qgroup rsv until it
> creates an ordered_extent.
> 
> Therefore, any errors that occur before the ordered_extent is created
> must free that reservation, or else the space is leaked. The fstest
> generic/475 exercises various IO error paths, and is able to trigger
> errors in cow_file_range where we fail to get to allocating the ordered
> extent. Note that because we *do* clear delalloc, we are likely to
> remove the inode from the delalloc list, so the inodes/pages to not have
> invalidate/launder called on them in the commit abort path.
> 
> This results in failures at the unmount stage of the test that look like:
> 
> [ 1903.401193] BTRFS: error (device dm-8 state EA) in
> cleanup_transaction:2018: errno=-5 IO failure
> [ 1903.402686] BTRFS: error (device dm-8 state EA) in
> btrfs_replace_file_extents:2416: errno=-5 IO failure
> [ 1903.446415] BTRFS warning (device dm-8 state EA): qgroup 0/5 has
> unreleased space, type 0 rsv 28672
> [ 1903.447887] ------------[ cut here ]------------
> [ 1903.448645] WARNING: CPU: 3 PID: 22588 at fs/btrfs/disk-io.c:4333
> close_ctree+0x222/0x4d0 [btrfs]
> [ 1903.450130] Modules linked in: btrfs blake2b_generic libcrc32c xor
> zstd_compress raid6_pq
> [ 1903.451408] CPU: 3 PID: 22588 Comm: umount Kdump: loaded Tainted: G
> W          6.10.0-rc7-gab56fde445b8 #21
> [ 1903.453058] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
> BIOS Arch Linux 1.16.3-1-1 04/01/2014
> [ 1903.454542] RIP: 0010:close_ctree+0x222/0x4d0 [btrfs]
> [ 1903.455417] Code: 4d c0 48 c7 c6 a0 92 4d c0 48 c7 c7 78 82 4d c0 e8
> 63 22 36 d7 90 0f 0b f0 80 4b 10 02 48 89 df e8 33 dc fb ff 84 c0 74 13
> 90 <0f> 0b 90 48 c7 c6 c8 92 4d c0 48 89 df e8 0c 22 01 00 48 89 df e8
> [ 1903.458317] RSP: 0018:ffffb4465283be00 EFLAGS: 00010202
> [ 1903.459159] RAX: 0000000000000001 RBX: ffffa1a1818e1000 RCX:
> 0000000000000001
> [ 1903.460286] RDX: 0000000000000000 RSI: ffffb4465283bbe0 RDI:
> ffffa1a19374fcb8
> [ 1903.461408] RBP: ffffa1a1818e13c0 R08: 0000000100028b16 R09:
> 0000000000000000
> [ 1903.462555] R10: 0000000000000003 R11: 0000000000000003 R12:
> ffffa1a18ad7972c
> [ 1903.463679] R13: 0000000000000000 R14: 0000000000000000 R15:
> 0000000000000000
> [ 1903.464803] FS:  00007f9168312b80(0000) GS:ffffa1a4afcc0000(0000)
> knlGS:0000000000000000
> [ 1903.466082] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 1903.467004] CR2: 00007f91683c9140 CR3: 000000010acaa000 CR4:
> 00000000000006f0
> [ 1903.468124] Call Trace:
> [ 1903.468548]  <TASK>
> [ 1903.468890]  ? close_ctree+0x222/0x4d0 [btrfs]
> [ 1903.469689]  ? __warn.cold+0x8e/0xea
> [ 1903.470260]  ? close_ctree+0x222/0x4d0 [btrfs]
> [ 1903.471052]  ? report_bug+0xff/0x140
> [ 1903.471646]  ? handle_bug+0x3b/0x70
> [ 1903.472212]  ? exc_invalid_op+0x17/0x70
> [ 1903.472838]  ? asm_exc_invalid_op+0x1a/0x20
> [ 1903.473518]  ? close_ctree+0x222/0x4d0 [btrfs]
> [ 1903.474283]  generic_shutdown_super+0x70/0x160
> [ 1903.475005]  kill_anon_super+0x11/0x40
> [ 1903.475630]  btrfs_kill_super+0x11/0x20 [btrfs]
> [ 1903.476405]  deactivate_locked_super+0x2e/0xa0
> [ 1903.477125]  cleanup_mnt+0xb5/0x150
> [ 1903.477699]  task_work_run+0x57/0x80
> [ 1903.478267]  syscall_exit_to_user_mode+0x121/0x130
> [ 1903.479056]  do_syscall_64+0xab/0x1a0
> [ 1903.479658]  entry_SYSCALL_64_after_hwframe+0x77/0x7f
> [ 1903.480467] RIP: 0033:0x7f916847a887
> [ 1903.481034] Code: 0d 00 f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44
> 00 00 31 f6 e9 09 00 00 00 66 0f 1f 84 00 00 00 00 00 b8 a6 00 00 00 0f
> 05 <48> 3d 00 f0 ff ff 77 01 c3 48 8b 15 71 25 0d 00 f7 d8 64 89 02 b8
> [ 1903.483951] RSP: 002b:00007ffe035d1648 EFLAGS: 00000246 ORIG_RAX:
> 00000000000000a6
> [ 1903.485153] RAX: 0000000000000000 RBX: 000056074eba0508 RCX:
> 00007f916847a887
> [ 1903.486244] RDX: 0000000000000000 RSI: 0000000000000000 RDI:
> 000056074eba0810
> [ 1903.487128] RBP: 0000000000000000 R08: 00007ffe035d03f0 R09:
> 0000000000000001
> [ 1903.488010] R10: 0000000000000103 R11: 0000000000000246 R12:
> 00007f91685cc22c
> [ 1903.488905] R13: 000056074eba0810 R14: 0000000000000000 R15:
> 000056074eba0400
> [ 1903.489792]  </TASK>
> [ 1903.490071] ---[ end trace 0000000000000000 ]---
> [ 1903.490657] BTRFS error (device dm-8 state EA): qgroup reserved space leaked
> 
> Cases 2 and 3 in the out_reserve path both pertain to this type of leak
> and must free the reserved qgroup data. Because it is already an error
> path, I opted not to handle the possible errors in
> btrfs_free_qgroup_data.
> 
> Signed-off-by: Boris Burkov <boris@bur.io>

Reviewed-by: Qu Wenruo <wqu@suse.com>

Thanks,
Qu
> ---
>   fs/btrfs/inode.c | 3 +++
>   1 file changed, 3 insertions(+)
> 
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index c5155981f99a..06337aee856a 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -1581,6 +1581,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
>   					     locked_page, &cached,
>   					     clear_bits,
>   					     page_ops);
> +		btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL);
>   		start += cur_alloc_size;
>   	}
>   
> @@ -1594,6 +1595,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
>   		clear_bits |= EXTENT_CLEAR_DATA_RESV;
>   		extent_clear_unlock_delalloc(inode, start, end, locked_page,
>   					     &cached, clear_bits, page_ops);
> +		btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL);
>   	}
>   	return ret;
>   }
> @@ -2255,6 +2257,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
>   					     EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
>   					     PAGE_START_WRITEBACK |
>   					     PAGE_END_WRITEBACK);
> +		btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL);
>   	}
>   	btrfs_free_path(path);
>   	return ret;
diff mbox series

Patch

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c5155981f99a..06337aee856a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1581,6 +1581,7 @@  static noinline int cow_file_range(struct btrfs_inode *inode,
 					     locked_page, &cached,
 					     clear_bits,
 					     page_ops);
+		btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL);
 		start += cur_alloc_size;
 	}
 
@@ -1594,6 +1595,7 @@  static noinline int cow_file_range(struct btrfs_inode *inode,
 		clear_bits |= EXTENT_CLEAR_DATA_RESV;
 		extent_clear_unlock_delalloc(inode, start, end, locked_page,
 					     &cached, clear_bits, page_ops);
+		btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL);
 	}
 	return ret;
 }
@@ -2255,6 +2257,7 @@  static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
 					     EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
 					     PAGE_START_WRITEBACK |
 					     PAGE_END_WRITEBACK);
+		btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL);
 	}
 	btrfs_free_path(path);
 	return ret;