diff mbox

[v3] btrfs: allocate raid type kobjects dynamically

Message ID 5383EBE0.2000906@suse.com
State Superseded, archived
Headers show

Commit Message

Jeff Mahoney May 27, 2014, 1:35 a.m. UTC
We are currently allocating space_info objects in an array when we
allocate space_info. When a user does something like:

# btrfs balance start -mconvert=raid1 -dconvert=raid1 /mnt
# btrfs balance start -mconvert=single -dconvert=single /mnt -f
# btrfs balance start -mconvert=raid1 -dconvert=raid1 /

We can end up with memory corruption since the kobject hasn't
been reinitialized properly and the name pointer was left set.

The rationale behind allocating them statically was to avoid
creating a separate kobject container that just contained the
raid type. It used the index in the array to determine the index.

Ultimately, though, this wastes more memory than it saves in all
but the most complex scenarios and introduces kobject lifetime
questions.

This patch allocates the kobjects dynamically instead. Note that
we also remove the kobject_get/put of the parent kobject since
kobject_add and kobject_del do that internally.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
 fs/btrfs/ctree.h       |    8 +++++++-
 fs/btrfs/extent-tree.c |   39 ++++++++++++++++++++++++++-------------
 fs/btrfs/sysfs.c       |    5 +++--
 3 files changed, 36 insertions(+), 16 deletions(-)

Comments

David Sterba May 27, 2014, 11:26 a.m. UTC | #1
A different lockdep chain, from test btrfs/014:

[  229.850151] ======================================================
[  229.850788] [ INFO: possible circular locking dependency detected ]
[  229.850788] 3.15.0-rc7-default+ #146 Tainted: G        W
[  229.850788] -------------------------------------------------------
[  229.850788] btrfs/26558 is trying to acquire lock:
[  229.850788]  (s_active#149){++++.+}, at: [<ffffffff811ee787>] kernfs_remove+0x27/0x40
[  229.850788]
[  229.850788] but task is already holding lock:
[  229.850788]  (&found->groups_sem){++++..}, at: [<ffffffffa0023abe>] btrfs_remove_block_group+0x24e/0x580 [btrfs]
[  229.850788]
[  229.850788] which lock already depends on the new lock.
[  229.850788]
[  229.850788]
[  229.850788] the existing dependency chain (in reverse order) is:
[  229.850788]
-> #1 (&found->groups_sem){++++..}:
[  229.850788]        [<ffffffff810b0662>] lock_acquire+0x92/0x120
[  229.850788]        [<ffffffff81a0bedc>] down_read+0x4c/0xa0
[  229.850788]        [<ffffffffa004babe>] raid_bytes_show+0x3e/0xd0 [btrfs]
[  229.850788]        [<ffffffff813b9ef6>] kobj_attr_show+0x16/0x20
[  229.850788]        [<ffffffff811f04e9>] sysfs_kf_seq_show+0xd9/0x230
[  229.850788]        [<ffffffff811eec56>] kernfs_seq_show+0x26/0x30
[  229.850788]        [<ffffffff8119ec8f>] seq_read+0xef/0x410
[  229.850788]        [<ffffffff811efa35>] kernfs_fop_read+0x125/0x180
[  229.850788]        [<ffffffff81179fe4>] vfs_read+0xb4/0x180
[  229.850788]        [<ffffffff8117a269>] SyS_read+0x59/0xd0
[  229.850788]        [<ffffffff81a16cd2>] system_call_fastpath+0x16/0x1b
[  229.850788]
-> #0 (s_active#149){++++.+}:
[  229.850788]        [<ffffffff810afc5c>] __lock_acquire+0x1c4c/0x1fa0
[  229.850788]        [<ffffffff810b0662>] lock_acquire+0x92/0x120
[  229.850788]        [<ffffffff811edc07>] __kernfs_remove+0x2b7/0x380
[  229.850788]        [<ffffffff811ee787>] kernfs_remove+0x27/0x40
[  229.850788]        [<ffffffff811f0b3a>] sysfs_remove_dir+0x5a/0x90
[  229.850788]        [<ffffffff813ba028>] kobject_del+0x18/0x90
[  229.850788]        [<ffffffffa0023cb2>] btrfs_remove_block_group+0x442/0x580 [btrfs]
[  229.850788]        [<ffffffffa005caf4>] btrfs_relocate_chunk+0x624/0x770 [btrfs]
[  229.850788]        [<ffffffffa005f652>] btrfs_balance+0x902/0xf50 [btrfs]
[  229.850788]        [<ffffffffa006b090>] btrfs_ioctl_balance+0x1e0/0x350 [btrfs]
[  229.850788]        [<ffffffffa006d4a9>] btrfs_ioctl+0xc39/0x1830 [btrfs]
[  229.850788]        [<ffffffff8118c381>] do_vfs_ioctl+0x91/0x560
[  229.850788]        [<ffffffff8118c8a3>] SyS_ioctl+0x53/0x80
[  229.850788]        [<ffffffff81a16cd2>] system_call_fastpath+0x16/0x1b
[  229.850788]
[  229.850788] other info that might help us debug this:
[  229.850788]
[  229.850788]  Possible unsafe locking scenario:
[  229.850788]
[  229.850788]        CPU0                    CPU1
[  229.850788]        ----                    ----
[  229.850788]   lock(&found->groups_sem);
[  229.850788]                                lock(s_active#149);
[  229.850788]                                lock(&found->groups_sem);
[  229.850788]   lock(s_active#149);
[  229.850788]
[  229.850788]  *** DEADLOCK ***
[  229.850788]
[  229.850788] 5 locks held by btrfs/26558:
[  229.850788]  #0:  (sb_writers#11){.+.+.+}, at: [<ffffffff8119a828>] mnt_want_write_file+0x28/0x60
[  229.850788]  #1:  (&fs_info->volume_mutex){+.+.+.}, at: [<ffffffffa006afe1>] btrfs_ioctl_balance+0x131/0x350 [btrfs]
[  229.850788]  #2:  (sb_internal){.+.+..}, at: [<ffffffffa0035436>] start_transaction+0x456/0x550 [btrfs]
[  229.850788]  #3:  (&fs_info->chunk_mutex){+.+...}, at: [<ffffffffa005927e>] lock_chunks+0x1e/0x20 [btrfs]
[  229.850788]  #4:  (&found->groups_sem){++++..}, at: [<ffffffffa0023abe>] btrfs_remove_block_group+0x24e/0x580 [btrfs]
[  229.850788]
[  229.850788] stack backtrace:
[  229.850788] CPU: 1 PID: 26558 Comm: btrfs Tainted: G        W     3.15.0-rc7-default+ #146
[  229.850788] Hardware name: Intel Corporation Santa Rosa platform/Matanzas, BIOS TSRSCRB1.86C.0047.B00.0610170821 10/17/06
[  229.850788]  ffffffff8281faf0 ffff8800652a1818 ffffffff81a07898 0000000000000001
[  229.850788]  ffffffff82827310 ffff8800652a1868 ffffffff810acb74 00000000001d4500
[  229.850788]  ffff8800652a18e8 0000000000000004 ffff880075e0ac78 0000000000000004
[  229.850788] Call Trace:
[  229.850788]  [<ffffffff81a07898>] dump_stack+0x51/0x71
[  229.850788]  [<ffffffff810acb74>] print_circular_bug+0x214/0x310
[  229.850788]  [<ffffffff810afc5c>] __lock_acquire+0x1c4c/0x1fa0
[  229.850788]  [<ffffffff811ee787>] ? kernfs_remove+0x27/0x40
[  229.850788]  [<ffffffff810b0662>] lock_acquire+0x92/0x120
[  229.850788]  [<ffffffff811ee787>] ? kernfs_remove+0x27/0x40
[  229.850788]  [<ffffffff81a0bb39>] ? mutex_unlock+0x9/0x10
[  229.850788]  [<ffffffff811edc07>] __kernfs_remove+0x2b7/0x380
[  229.850788]  [<ffffffff811ee787>] ? kernfs_remove+0x27/0x40
[  229.850788]  [<ffffffff811ee77f>] ? kernfs_remove+0x1f/0x40
[  229.850788]  [<ffffffff811ee787>] kernfs_remove+0x27/0x40
[  229.850788]  [<ffffffff811f0b3a>] sysfs_remove_dir+0x5a/0x90
[  229.850788]  [<ffffffff813ba028>] kobject_del+0x18/0x90
[  229.850788]  [<ffffffffa0023cb2>] btrfs_remove_block_group+0x442/0x580 [btrfs]
[  229.850788]  [<ffffffff810b112d>] ? trace_hardirqs_on_caller+0x11d/0x1e0
[  229.850788]  [<ffffffffa005caf4>] btrfs_relocate_chunk+0x624/0x770 [btrfs]
[  229.850788]  [<ffffffffa0056ac6>] ? release_extent_buffer+0x36/0xe0 [btrfs]
[  229.850788]  [<ffffffffa005706a>] ? free_extent_buffer+0x4a/0xc0 [btrfs]
[  229.850788]  [<ffffffffa0057080>] ? free_extent_buffer+0x60/0xc0 [btrfs]
[  229.850788]  [<ffffffffa005f652>] btrfs_balance+0x902/0xf50 [btrfs]
[  229.850788]  [<ffffffffa006b090>] ? btrfs_ioctl_balance+0x1e0/0x350 [btrfs]
[  229.850788]  [<ffffffff8116e3fa>] ? kmem_cache_alloc_trace+0x12a/0x180
[  229.850788]  [<ffffffffa006b090>] btrfs_ioctl_balance+0x1e0/0x350 [btrfs]
[  229.850788]  [<ffffffffa006c870>] ? update_ioctl_balance_args+0x130/0x130 [btrfs]
[  229.850788]  [<ffffffffa006d4a9>] btrfs_ioctl+0xc39/0x1830 [btrfs]
[  229.850788]  [<ffffffff8118c381>] do_vfs_ioctl+0x91/0x560
[  229.850788]  [<ffffffff81197959>] ? __fget_light+0x9/0x70
[  229.850788]  [<ffffffff811979c9>] ? __fdget+0x9/0x20
[  229.850788]  [<ffffffff8118c8a3>] SyS_ioctl+0x53/0x80
[  229.850788]  [<ffffffff81a16cd2>] system_call_fastpath+0x16/0x1b

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Mahoney May 27, 2014, 12:14 p.m. UTC | #2
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 5/27/14, 7:26 AM, David Sterba wrote:
> A different lockdep chain, from test btrfs/014:

Are you sure this one was with the new patch? We shouldn't be holding
groups_sem anymore in btrfs_remove_block_group while performing the
kobject ops.

- -Jeff

> [  229.850151]
> ====================================================== [
> 229.850788] [ INFO: possible circular locking dependency detected
> ] [  229.850788] 3.15.0-rc7-default+ #146 Tainted: G        W [
> 229.850788]
> ------------------------------------------------------- [
> 229.850788] btrfs/26558 is trying to acquire lock: [  229.850788]
> (s_active#149){++++.+}, at: [<ffffffff811ee787>]
> kernfs_remove+0x27/0x40 [  229.850788] [  229.850788] but task is
> already holding lock: [  229.850788]  (&found->groups_sem){++++..},
> at: [<ffffffffa0023abe>] btrfs_remove_block_group+0x24e/0x580
> [btrfs] [  229.850788] [  229.850788] which lock already depends on
> the new lock. [  229.850788] [  229.850788] [  229.850788] the
> existing dependency chain (in reverse order) is: [  229.850788] ->
> #1 (&found->groups_sem){++++..}: [  229.850788]
> [<ffffffff810b0662>] lock_acquire+0x92/0x120 [  229.850788]
> [<ffffffff81a0bedc>] down_read+0x4c/0xa0 [  229.850788]
> [<ffffffffa004babe>] raid_bytes_show+0x3e/0xd0 [btrfs] [
> 229.850788]        [<ffffffff813b9ef6>] kobj_attr_show+0x16/0x20 [
> 229.850788]        [<ffffffff811f04e9>]
> sysfs_kf_seq_show+0xd9/0x230 [  229.850788]
> [<ffffffff811eec56>] kernfs_seq_show+0x26/0x30 [  229.850788]
> [<ffffffff8119ec8f>] seq_read+0xef/0x410 [  229.850788]
> [<ffffffff811efa35>] kernfs_fop_read+0x125/0x180 [  229.850788]
> [<ffffffff81179fe4>] vfs_read+0xb4/0x180 [  229.850788]
> [<ffffffff8117a269>] SyS_read+0x59/0xd0 [  229.850788]
> [<ffffffff81a16cd2>] system_call_fastpath+0x16/0x1b [  229.850788] 
> -> #0 (s_active#149){++++.+}: [  229.850788]
> [<ffffffff810afc5c>] __lock_acquire+0x1c4c/0x1fa0 [  229.850788]
> [<ffffffff810b0662>] lock_acquire+0x92/0x120 [  229.850788]
> [<ffffffff811edc07>] __kernfs_remove+0x2b7/0x380 [  229.850788]
> [<ffffffff811ee787>] kernfs_remove+0x27/0x40 [  229.850788]
> [<ffffffff811f0b3a>] sysfs_remove_dir+0x5a/0x90 [  229.850788]
> [<ffffffff813ba028>] kobject_del+0x18/0x90 [  229.850788]
> [<ffffffffa0023cb2>] btrfs_remove_block_group+0x442/0x580 [btrfs] [
> 229.850788]        [<ffffffffa005caf4>]
> btrfs_relocate_chunk+0x624/0x770 [btrfs] [  229.850788]
> [<ffffffffa005f652>] btrfs_balance+0x902/0xf50 [btrfs] [
> 229.850788]        [<ffffffffa006b090>]
> btrfs_ioctl_balance+0x1e0/0x350 [btrfs] [  229.850788]
> [<ffffffffa006d4a9>] btrfs_ioctl+0xc39/0x1830 [btrfs] [
> 229.850788]        [<ffffffff8118c381>] do_vfs_ioctl+0x91/0x560 [
> 229.850788]        [<ffffffff8118c8a3>] SyS_ioctl+0x53/0x80 [
> 229.850788]        [<ffffffff81a16cd2>]
> system_call_fastpath+0x16/0x1b [  229.850788] [  229.850788] other
> info that might help us debug this: [  229.850788] [  229.850788]
> Possible unsafe locking scenario: [  229.850788] [  229.850788]
> CPU0                    CPU1 [  229.850788]        ----
> ---- [  229.850788]   lock(&found->groups_sem); [  229.850788]
> lock(s_active#149); [  229.850788]
> lock(&found->groups_sem); [  229.850788]   lock(s_active#149); [
> 229.850788] [  229.850788]  *** DEADLOCK *** [  229.850788] [
> 229.850788] 5 locks held by btrfs/26558: [  229.850788]  #0:
> (sb_writers#11){.+.+.+}, at: [<ffffffff8119a828>]
> mnt_want_write_file+0x28/0x60 [  229.850788]  #1:
> (&fs_info->volume_mutex){+.+.+.}, at: [<ffffffffa006afe1>]
> btrfs_ioctl_balance+0x131/0x350 [btrfs] [  229.850788]  #2:
> (sb_internal){.+.+..}, at: [<ffffffffa0035436>]
> start_transaction+0x456/0x550 [btrfs] [  229.850788]  #3:
> (&fs_info->chunk_mutex){+.+...}, at: [<ffffffffa005927e>]
> lock_chunks+0x1e/0x20 [btrfs] [  229.850788]  #4:
> (&found->groups_sem){++++..}, at: [<ffffffffa0023abe>]
> btrfs_remove_block_group+0x24e/0x580 [btrfs] [  229.850788] [
> 229.850788] stack backtrace: [  229.850788] CPU: 1 PID: 26558 Comm:
> btrfs Tainted: G        W     3.15.0-rc7-default+ #146 [
> 229.850788] Hardware name: Intel Corporation Santa Rosa
> platform/Matanzas, BIOS TSRSCRB1.86C.0047.B00.0610170821 10/17/06 [
> 229.850788]  ffffffff8281faf0 ffff8800652a1818 ffffffff81a07898
> 0000000000000001 [  229.850788]  ffffffff82827310 ffff8800652a1868
> ffffffff810acb74 00000000001d4500 [  229.850788]  ffff8800652a18e8
> 0000000000000004 ffff880075e0ac78 0000000000000004 [  229.850788]
> Call Trace: [  229.850788]  [<ffffffff81a07898>]
> dump_stack+0x51/0x71 [  229.850788]  [<ffffffff810acb74>]
> print_circular_bug+0x214/0x310 [  229.850788]  [<ffffffff810afc5c>]
> __lock_acquire+0x1c4c/0x1fa0 [  229.850788]  [<ffffffff811ee787>] ?
> kernfs_remove+0x27/0x40 [  229.850788]  [<ffffffff810b0662>]
> lock_acquire+0x92/0x120 [  229.850788]  [<ffffffff811ee787>] ?
> kernfs_remove+0x27/0x40 [  229.850788]  [<ffffffff81a0bb39>] ?
> mutex_unlock+0x9/0x10 [  229.850788]  [<ffffffff811edc07>]
> __kernfs_remove+0x2b7/0x380 [  229.850788]  [<ffffffff811ee787>] ?
> kernfs_remove+0x27/0x40 [  229.850788]  [<ffffffff811ee77f>] ?
> kernfs_remove+0x1f/0x40 [  229.850788]  [<ffffffff811ee787>]
> kernfs_remove+0x27/0x40 [  229.850788]  [<ffffffff811f0b3a>]
> sysfs_remove_dir+0x5a/0x90 [  229.850788]  [<ffffffff813ba028>]
> kobject_del+0x18/0x90 [  229.850788]  [<ffffffffa0023cb2>]
> btrfs_remove_block_group+0x442/0x580 [btrfs] [  229.850788]
> [<ffffffff810b112d>] ? trace_hardirqs_on_caller+0x11d/0x1e0 [
> 229.850788]  [<ffffffffa005caf4>] btrfs_relocate_chunk+0x624/0x770
> [btrfs] [  229.850788]  [<ffffffffa0056ac6>] ?
> release_extent_buffer+0x36/0xe0 [btrfs] [  229.850788]
> [<ffffffffa005706a>] ? free_extent_buffer+0x4a/0xc0 [btrfs] [
> 229.850788]  [<ffffffffa0057080>] ? free_extent_buffer+0x60/0xc0
> [btrfs] [  229.850788]  [<ffffffffa005f652>]
> btrfs_balance+0x902/0xf50 [btrfs] [  229.850788]
> [<ffffffffa006b090>] ? btrfs_ioctl_balance+0x1e0/0x350 [btrfs] [
> 229.850788]  [<ffffffff8116e3fa>] ?
> kmem_cache_alloc_trace+0x12a/0x180 [  229.850788]
> [<ffffffffa006b090>] btrfs_ioctl_balance+0x1e0/0x350 [btrfs] [
> 229.850788]  [<ffffffffa006c870>] ?
> update_ioctl_balance_args+0x130/0x130 [btrfs] [  229.850788]
> [<ffffffffa006d4a9>] btrfs_ioctl+0xc39/0x1830 [btrfs] [
> 229.850788]  [<ffffffff8118c381>] do_vfs_ioctl+0x91/0x560 [
> 229.850788]  [<ffffffff81197959>] ? __fget_light+0x9/0x70 [
> 229.850788]  [<ffffffff811979c9>] ? __fdget+0x9/0x20 [  229.850788]
> [<ffffffff8118c8a3>] SyS_ioctl+0x53/0x80 [  229.850788]
> [<ffffffff81a16cd2>] system_call_fastpath+0x16/0x1b
> 


- -- 
Jeff Mahoney
SUSE Labs
-----BEGIN PGP SIGNATURE-----
Version: GnuPG/MacGPG2 v2.0.22 (Darwin)

iQIcBAEBAgAGBQJThIGTAAoJEB57S2MheeWyxjUQAIYFZ2JM974A4fEkDxbz4yDK
Hcw0A6wU5Ykv28VwReix55GkbB1u3kqTOauf0nqAVgnL5mW4c3SETbrKVhnPkg+R
w5w26hUg7o9Ia+iCXNMJv+JDqXix6lHNHe7i927BDCF6BQ6+HjOAIRyi74GRZf3r
glhpa4HUmqVb2pYPlepu4USUBuyV705CEywLt0IkE4QBOvVBkTQKO4T503vBMZRg
morMC+Iz5p3Bndpn+oParBqt4rXKdk0U/cxdcg6j/UCopLB4D3JcChTaWTpYe311
ixsm+PI3tk6vO8KxuF6pO6wuXYOyEthARYHgJw/bPyDFHKFWXrpFiECdCEBT5jOd
YcxVss+9eTO7aZ8X5gqu6/Oj9MO7ZKPjVWjVHSo8hiSy8LUaD9rbg0MwT/LMTqmC
Zmz77GDpUEawQrMXhKkKe1CJdiHd+5hHizFzNZIVkdZLZklEPUCOjv9Y5GnzcYrC
rkl8KMzu+6qSMl5WZSFAsgxektBrUCygZCFuRvI2bB6CoEieC46cC28q45Nw3S+P
BKqX0iS2u0TIM4lj9lkYOWXyxl+0U/yEmkkwhvceUqX9BVTPD24/o6SA5XjYtsKU
6HuQx72jeLJoky5wAWjhtofhrbMjZ0ZbwDXXb8wCJ+hHk1RESqRpn+KpLWQsAuaB
6Klk0LvZvftPXWvrcuF8
=mpGl
-----END PGP SIGNATURE-----
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba May 27, 2014, 3:23 p.m. UTC | #3
On Tue, May 27, 2014 at 08:14:11AM -0400, Jeff Mahoney wrote:
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
> 
> On 5/27/14, 7:26 AM, David Sterba wrote:
> > A different lockdep chain, from test btrfs/014:
> 
> Are you sure this one was with the new patch? We shouldn't be holding
> groups_sem anymore in btrfs_remove_block_group while performing the
> kobject ops.

Right, the lockdep call chain does not match the sources. I've rebuilt &
rebooted with v3 and am not seeing any of the sysfs related messages.

Tested-by: David Sterba <dsterba@suse.cz>

The fix is for stable 3.13+ and even better if it lands in 3.15, as it
fixes a double-free that can randomly hit other subsystems due to
frequent kmalloc-32 slab reuse,
(eg. https://bugzilla.kernel.org/show_bug.cgi?id=72461).
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chris Mason May 27, 2014, 4:10 p.m. UTC | #4
On 05/26/2014 09:35 PM, Jeff Mahoney wrote:
> We are currently allocating space_info objects in an array when we
> allocate space_info. When a user does something like:
> 
> # btrfs balance start -mconvert=raid1 -dconvert=raid1 /mnt
> # btrfs balance start -mconvert=single -dconvert=single /mnt -f
> # btrfs balance start -mconvert=raid1 -dconvert=raid1 /
> 
> We can end up with memory corruption since the kobject hasn't
> been reinitialized properly and the name pointer was left set.
> 
> The rationale behind allocating them statically was to avoid
> creating a separate kobject container that just contained the
> raid type. It used the index in the array to determine the index.
> 
> Ultimately, though, this wastes more memory than it saves in all
> but the most complex scenarios and introduces kobject lifetime
> questions.
> 
> This patch allocates the kobjects dynamically instead. Note that
> we also remove the kobject_get/put of the parent kobject since
> kobject_add and kobject_del do that internally.

Thanks Jeff, one small thing below:

> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -8352,17 +8351,26 @@ static void __link_block_group(struct bt
>  	up_write(&space_info->groups_sem);
>  
>  	if (first) {
> -		struct kobject *kobj = &space_info->block_group_kobjs[index];
> +		struct raid_kobject *rkobj;
>  		int ret;
>  
> -		kobject_get(&space_info->kobj); /* put in release */
> -		ret = kobject_add(kobj, &space_info->kobj, "%s",
> -				  get_raid_name(index));
> +		rkobj = kzalloc(sizeof(*rkobj), GFP_KERNEL);
                                               ^^^^^^^^^^^^

GFP_NOFS?  We've got a transaction running here.

-chris
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Mahoney May 27, 2014, 4:58 p.m. UTC | #5
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 5/27/14, 12:10 PM, Chris Mason wrote:
> On 05/26/2014 09:35 PM, Jeff Mahoney wrote:
>> We are currently allocating space_info objects in an array when
>> we allocate space_info. When a user does something like:
>> 
>> # btrfs balance start -mconvert=raid1 -dconvert=raid1 /mnt #
>> btrfs balance start -mconvert=single -dconvert=single /mnt -f #
>> btrfs balance start -mconvert=raid1 -dconvert=raid1 /
>> 
>> We can end up with memory corruption since the kobject hasn't 
>> been reinitialized properly and the name pointer was left set.
>> 
>> The rationale behind allocating them statically was to avoid 
>> creating a separate kobject container that just contained the 
>> raid type. It used the index in the array to determine the
>> index.
>> 
>> Ultimately, though, this wastes more memory than it saves in all 
>> but the most complex scenarios and introduces kobject lifetime 
>> questions.
>> 
>> This patch allocates the kobjects dynamically instead. Note that 
>> we also remove the kobject_get/put of the parent kobject since 
>> kobject_add and kobject_del do that internally.
> 
> Thanks Jeff, one small thing below:
> 
>> --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@
>> -8352,17 +8351,26 @@ static void __link_block_group(struct bt 
>> up_write(&space_info->groups_sem);
>> 
>> if (first) { -		struct kobject *kobj =
>> &space_info->block_group_kobjs[index]; +		struct raid_kobject
>> *rkobj; int ret;
>> 
>> -		kobject_get(&space_info->kobj); /* put in release */ -		ret =
>> kobject_add(kobj, &space_info->kobj, "%s", -
>> get_raid_name(index)); +		rkobj = kzalloc(sizeof(*rkobj),
>> GFP_KERNEL);
> ^^^^^^^^^^^^
> 
> GFP_NOFS?  We've got a transaction running here.

Sigh. Yep.

- -Jeff

- -- 
Jeff Mahoney
SUSE Labs
-----BEGIN PGP SIGNATURE-----
Version: GnuPG/MacGPG2 v2.0.22 (Darwin)

iQIcBAEBAgAGBQJThMRPAAoJEB57S2MheeWyViAP/jVgxqPgISBaMkYX4aM3Cfcq
cO1U1oRFJfn6HQJ58zqqhpnRtgGHAlcoxt5j7GSZejUDi1ks274qJlIfqJBxhoeD
PtgJhIjIdTHs1hHrZprIvTEnEv1OpUq+xbfp3bPEEw+Bgu2oYyRhdZ/MMhFDSWuM
jlvhgmTROz1p7tPbxuFVKPo1dY4OmDFed8TqS+Cl2NygNW1vwNf5v8c00IInlE4R
VLUvhOaZCVSMwpG24ADVz0wAQDOtyogay2UHCY3XfLvH2sdtPiIdx4w+Zf9rOiFD
7IGyx6x1Cf57+H+fjQ2tMdhZuYU4dn1Tp1qxEbL1XivWP/weYLkurdA42XeOYSRt
IUjH6hP4/JX0eJIs4IFZErjlpgvBvcOeChKyUXgAUy+lEd7JV5CEoo58BjfnaCVY
e+GjpTaYHDx7xiuHg06nO2LcugRzVeiORlI5oR6s9reIEbC1SpKblK4jvMRFcM/2
j5GCK25i/bMTBTby50+dBojuMGVOXT7Fm42sK/lMv3OVu3h3zZq0N1Kq5GSLHgfn
rEr/l4HvCE6DCAb3Uqm+Gf/WZ45vd6/29Zt3/UHaOq+vr852H5SDL1qPS9zLCqnx
JwSj+PUK/Ns8fHjlIVweYc0sBgJdQcU8eW8M/m4X261i42veP0YC71P//N+M9PvF
qcsDs/YxPLUod5Itm1Zf
=C73I
-----END PGP SIGNATURE-----
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1113,6 +1113,12 @@  struct btrfs_qgroup_limit_item {
 	__le64 rsv_excl;
 } __attribute__ ((__packed__));
 
+/* For raid type sysfs entries */
+struct raid_kobject {
+	int raid_type;
+	struct kobject kobj;
+};
+
 struct btrfs_space_info {
 	spinlock_t lock;
 
@@ -1163,7 +1169,7 @@  struct btrfs_space_info {
 	wait_queue_head_t wait;
 
 	struct kobject kobj;
-	struct kobject block_group_kobjs[BTRFS_NR_RAID_TYPES];
+	struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES];
 };
 
 #define	BTRFS_BLOCK_RSV_GLOBAL		1
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3401,10 +3401,8 @@  static int update_space_info(struct btrf
 		return ret;
 	}
 
-	for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
+	for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
 		INIT_LIST_HEAD(&found->block_groups[i]);
-		kobject_init(&found->block_group_kobjs[i], &btrfs_raid_ktype);
-	}
 	init_rwsem(&found->groups_sem);
 	spin_lock_init(&found->lock);
 	found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
@@ -8327,8 +8325,9 @@  int btrfs_free_block_groups(struct btrfs
 		list_del(&space_info->list);
 		for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
 			struct kobject *kobj;
-			kobj = &space_info->block_group_kobjs[i];
-			if (kobj->parent) {
+			kobj = space_info->block_group_kobjs[i];
+			space_info->block_group_kobjs[i] = NULL;
+			if (kobj) {
 				kobject_del(kobj);
 				kobject_put(kobj);
 			}
@@ -8352,17 +8351,26 @@  static void __link_block_group(struct bt
 	up_write(&space_info->groups_sem);
 
 	if (first) {
-		struct kobject *kobj = &space_info->block_group_kobjs[index];
+		struct raid_kobject *rkobj;
 		int ret;
 
-		kobject_get(&space_info->kobj); /* put in release */
-		ret = kobject_add(kobj, &space_info->kobj, "%s",
-				  get_raid_name(index));
+		rkobj = kzalloc(sizeof(*rkobj), GFP_KERNEL);
+		if (!rkobj)
+			goto out_err;
+		rkobj->raid_type = index;
+		kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
+		ret = kobject_add(&rkobj->kobj, &space_info->kobj,
+				  "%s", get_raid_name(index));
 		if (ret) {
-			pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
-			kobject_put(&space_info->kobj);
+			kobject_put(&rkobj->kobj);
+			goto out_err;
 		}
+		space_info->block_group_kobjs[index] = &rkobj->kobj;
 	}
+
+	return;
+out_err:
+	pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
 }
 
 static struct btrfs_block_group_cache *
@@ -8697,6 +8705,7 @@  int btrfs_remove_block_group(struct btrf
 	struct btrfs_root *tree_root = root->fs_info->tree_root;
 	struct btrfs_key key;
 	struct inode *inode;
+	struct kobject *kobj = NULL;
 	int ret;
 	int index;
 	int factor;
@@ -8796,11 +8805,15 @@  int btrfs_remove_block_group(struct btrf
 	 */
 	list_del_init(&block_group->list);
 	if (list_empty(&block_group->space_info->block_groups[index])) {
-		kobject_del(&block_group->space_info->block_group_kobjs[index]);
-		kobject_put(&block_group->space_info->block_group_kobjs[index]);
+		kobj = block_group->space_info->block_group_kobjs[index];
+		block_group->space_info->block_group_kobjs[index] = NULL;
 		clear_avail_alloc_bits(root->fs_info, block_group->flags);
 	}
 	up_write(&block_group->space_info->groups_sem);
+	if (kobj) {
+		kobject_del(kobj);
+		kobject_put(kobj);
+	}
 
 	if (block_group->cached == BTRFS_CACHE_STARTED)
 		wait_block_group_cache_done(block_group);
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -254,6 +254,7 @@  static ssize_t global_rsv_reserved_show(
 BTRFS_ATTR(global_rsv_reserved, 0444, global_rsv_reserved_show);
 
 #define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj)
+#define to_raid_kobj(_kobj) container_of(_kobj, struct raid_kobject, kobj)
 
 static ssize_t raid_bytes_show(struct kobject *kobj,
 			       struct kobj_attribute *attr, char *buf);
@@ -266,7 +267,7 @@  static ssize_t raid_bytes_show(struct ko
 {
 	struct btrfs_space_info *sinfo = to_space_info(kobj->parent);
 	struct btrfs_block_group_cache *block_group;
-	int index = kobj - sinfo->block_group_kobjs;
+	int index = to_raid_kobj(kobj)->raid_type;
 	u64 val = 0;
 
 	down_read(&sinfo->groups_sem);
@@ -288,7 +289,7 @@  static struct attribute *raid_attributes
 
 static void release_raid_kobj(struct kobject *kobj)
 {
-	kobject_put(kobj->parent);
+	kfree(to_raid_kobj(kobj));
 }
 
 struct kobj_type btrfs_raid_ktype = {