Message ID | 2db650ec206db1cb3e68590951b59e222fb10116.1598792561.git.anand.jain@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: seed fix null ptr, use only main device_list_mutex, and cleanups | expand |
On 30.08.20 г. 17:40 ч., Anand Jain wrote: > The following test case leads to null kobject-being-freed error. > > mount seed /mnt > add sprout to /mnt > umount /mnt > mount sprout to /mnt > delete seed > > kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called. > WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350 > RIP: 0010:kobject_put+0x80/0x350 > :: > Call Trace: > btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs] > btrfs_rm_device.cold+0xa8/0x298 [btrfs] > btrfs_ioctl+0x206c/0x22a0 [btrfs] > ksys_ioctl+0xe2/0x140 > __x64_sys_ioctl+0x1e/0x29 > do_syscall_64+0x96/0x150 > entry_SYSCALL_64_after_hwframe+0x44/0xa9 > RIP: 0033:0x7f4047c6288b > :: > > This is because, at the end of the seed device-delete, we try to remove > the seed's devid sysfs entry. But for the seed devices under the sprout > fs, we don't initialize the devid kobject yet. So this patch initializes > the seed device devid kobject and the device link in the sysfs. This takes > care of the Warning. > > Signed-off-by: Anand Jain <anand.jain@oracle.com> This patch is doing too many things at once. Split it so that: 1. You first add the new functions add_device/remove_device in 2 separate patches. 2. Switch btrfs_sysfs_add_devices_dir/btrfs_sysfs_remove_devices_dir to ousing the newly added helpers, again in 2 separate patches > --- > fs/btrfs/sysfs.c | 146 ++++++++++++++++++++++++++++++----------------- > 1 file changed, 93 insertions(+), 53 deletions(-) > > diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c > index 190e59152be5..9b5e58091fae 100644 > --- a/fs/btrfs/sysfs.c > +++ b/fs/btrfs/sysfs.c > @@ -1149,45 +1149,48 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info, > return 0; > } > > -/* when one_device is NULL, it removes all device links */ > - > -int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices, > - struct btrfs_device *one_device) > +static void btrfs_sysfs_remove_device(struct btrfs_device *device) > { > struct hd_struct *disk; > struct kobject *disk_kobj; > + struct kobject *devices_kobj; > > - if (!fs_devices->devices_kobj) > - return -EINVAL; > + /* > + * Seed fs_devices devices_kobj aren't used, fetch kobject from the > + * fs_info::fs_devices. > + */ > + devices_kobj = device->fs_info->fs_devices->devices_kobj; > + ASSERT(devices_kobj); > > - if (one_device) { > - if (one_device->bdev) { > - disk = one_device->bdev->bd_part; > - disk_kobj = &part_to_dev(disk)->kobj; > - sysfs_remove_link(fs_devices->devices_kobj, > - disk_kobj->name); > - } > + if (device->bdev) { > + disk = device->bdev->bd_part; > + disk_kobj = &part_to_dev(disk)->kobj; > + sysfs_remove_link(devices_kobj, disk_kobj->name); > + } > > - kobject_del(&one_device->devid_kobj); > - kobject_put(&one_device->devid_kobj); > + kobject_del(&device->devid_kobj); > + kobject_put(&device->devid_kobj); > > - wait_for_completion(&one_device->kobj_unregister); > + wait_for_completion(&device->kobj_unregister); > +} > > +/* when 2nd argument device is NULL, it removes all devices link */ > +int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices, > + struct btrfs_device *device) > +{ > + struct btrfs_fs_devices *seed_fs_devices; > + > + if (device) { > + btrfs_sysfs_remove_device(device); > return 0; > } > > - list_for_each_entry(one_device, &fs_devices->devices, dev_list) { > - > - if (one_device->bdev) { > - disk = one_device->bdev->bd_part; > - disk_kobj = &part_to_dev(disk)->kobj; > - sysfs_remove_link(fs_devices->devices_kobj, > - disk_kobj->name); > - } > - kobject_del(&one_device->devid_kobj); > - kobject_put(&one_device->devid_kobj); > + list_for_each_entry(device, &fs_devices->devices, dev_list) > + btrfs_sysfs_remove_device(device); > > - wait_for_completion(&one_device->kobj_unregister); > + list_for_each_entry(seed_fs_devices, &fs_devices->seed_list, seed_list) { > + list_for_each_entry(device, &seed_fs_devices->devices, dev_list) > + btrfs_sysfs_remove_device(device); > } > > return 0; > @@ -1271,44 +1274,81 @@ static struct kobj_type devid_ktype = { > .release = btrfs_release_devid_kobj, > }; > > +static int btrfs_sysfs_add_device(struct btrfs_device *device) > +{ > + int ret; > + struct kobject *devices_kobj; > + struct kobject *devinfo_kobj; > + > + /* > + * make sure we use the fs_info::fs_devices to fetch the kobjects > + * even for the seed fs_devices > + */ > + devices_kobj = device->fs_devices->fs_info->fs_devices->devices_kobj; > + devinfo_kobj = device->fs_devices->fs_info->fs_devices->devinfo_kobj; > + ASSERT(devices_kobj); > + ASSERT(devinfo_kobj); > + > + if (device->bdev) { > + struct hd_struct *disk; > + struct kobject *disk_kobj; > + > + disk = device->bdev->bd_part; > + disk_kobj = &part_to_dev(disk)->kobj; > + > + ret = sysfs_create_link(devices_kobj, disk_kobj, > + disk_kobj->name); > + if (ret) { > + btrfs_warn(device->fs_info, > + "sysfs create device link failed %d devid %llu", > + ret, device->devid); > + return ret; > + } > + } > + > + init_completion(&device->kobj_unregister); > + ret = kobject_init_and_add(&device->devid_kobj, &devid_ktype, > + devinfo_kobj, "%llu", device->devid); > + if (ret) { > + kobject_put(&device->devid_kobj); > + btrfs_warn(device->fs_info, > + "sysfs devinfo init failed %d devid %llu", > + ret, device->devid); > + } > + > + return ret; > +} > + > int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices, > - struct btrfs_device *one_device) > + struct btrfs_device *device) > { > - int error = 0; > - struct btrfs_device *dev; > + int ret = 0; > unsigned int nofs_flag; > + struct btrfs_fs_devices *seed_fs_devices; > > nofs_flag = memalloc_nofs_save(); > - list_for_each_entry(dev, &fs_devices->devices, dev_list) { > > - if (one_device && one_device != dev) > - continue; > + if (device) > + return btrfs_sysfs_add_device(device); > > - if (dev->bdev) { > - struct hd_struct *disk; > - struct kobject *disk_kobj; > - > - disk = dev->bdev->bd_part; > - disk_kobj = &part_to_dev(disk)->kobj; > - > - error = sysfs_create_link(fs_devices->devices_kobj, > - disk_kobj, disk_kobj->name); > - if (error) > - break; > - } > + list_for_each_entry(device, &fs_devices->devices, dev_list) { > + ret = btrfs_sysfs_add_device(device); > + if (ret) > + goto out; > + } > > - init_completion(&dev->kobj_unregister); > - error = kobject_init_and_add(&dev->devid_kobj, &devid_ktype, > - fs_devices->devinfo_kobj, "%llu", > - dev->devid); > - if (error) { > - kobject_put(&dev->devid_kobj); > - break; > + list_for_each_entry(seed_fs_devices, &fs_devices->seed_list, seed_list) { > + list_for_each_entry(device, &seed_fs_devices->devices, dev_list) { > + ret = btrfs_sysfs_add_device(device); > + if (ret) > + goto out; > } > } > + > +out: > memalloc_nofs_restore(nofs_flag); > > - return error; > + return ret; > } > > void btrfs_kobject_uevent(struct block_device *bdev, enum kobject_action action) >
On 31/8/20 5:07 pm, Nikolay Borisov wrote: > > > On 30.08.20 г. 17:40 ч., Anand Jain wrote: >> The following test case leads to null kobject-being-freed error. >> >> mount seed /mnt >> add sprout to /mnt >> umount /mnt >> mount sprout to /mnt >> delete seed >> >> kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called. >> WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350 >> RIP: 0010:kobject_put+0x80/0x350 >> :: >> Call Trace: >> btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs] >> btrfs_rm_device.cold+0xa8/0x298 [btrfs] >> btrfs_ioctl+0x206c/0x22a0 [btrfs] >> ksys_ioctl+0xe2/0x140 >> __x64_sys_ioctl+0x1e/0x29 >> do_syscall_64+0x96/0x150 >> entry_SYSCALL_64_after_hwframe+0x44/0xa9 >> RIP: 0033:0x7f4047c6288b >> :: >> >> This is because, at the end of the seed device-delete, we try to remove >> the seed's devid sysfs entry. But for the seed devices under the sprout >> fs, we don't initialize the devid kobject yet. So this patch initializes >> the seed device devid kobject and the device link in the sysfs. This takes >> care of the Warning. >> >> Signed-off-by: Anand Jain <anand.jain@oracle.com> > > This patch is doing too many things at once. Split it so that: > 1. You first add the new functions add_device/remove_device in 2 > separate patches. > 2. Switch btrfs_sysfs_add_devices_dir/btrfs_sysfs_remove_devices_dir to > ousing the newly added helpers, again in 2 separate patches yeah and 3. Initialize seed devices devid kobject to fix the bug. fixing. -Anand > >> --- >> fs/btrfs/sysfs.c | 146 ++++++++++++++++++++++++++++++----------------- >> 1 file changed, 93 insertions(+), 53 deletions(-) >> >> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c >> index 190e59152be5..9b5e58091fae 100644 >> --- a/fs/btrfs/sysfs.c >> +++ b/fs/btrfs/sysfs.c >> @@ -1149,45 +1149,48 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info, >> return 0; >> } >> >> -/* when one_device is NULL, it removes all device links */ >> - >> -int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices, >> - struct btrfs_device *one_device) >> +static void btrfs_sysfs_remove_device(struct btrfs_device *device) >> { >> struct hd_struct *disk; >> struct kobject *disk_kobj; >> + struct kobject *devices_kobj; >> >> - if (!fs_devices->devices_kobj) >> - return -EINVAL; >> + /* >> + * Seed fs_devices devices_kobj aren't used, fetch kobject from the >> + * fs_info::fs_devices. >> + */ >> + devices_kobj = device->fs_info->fs_devices->devices_kobj; >> + ASSERT(devices_kobj); >> >> - if (one_device) { >> - if (one_device->bdev) { >> - disk = one_device->bdev->bd_part; >> - disk_kobj = &part_to_dev(disk)->kobj; >> - sysfs_remove_link(fs_devices->devices_kobj, >> - disk_kobj->name); >> - } >> + if (device->bdev) { >> + disk = device->bdev->bd_part; >> + disk_kobj = &part_to_dev(disk)->kobj; >> + sysfs_remove_link(devices_kobj, disk_kobj->name); >> + } >> >> - kobject_del(&one_device->devid_kobj); >> - kobject_put(&one_device->devid_kobj); >> + kobject_del(&device->devid_kobj); >> + kobject_put(&device->devid_kobj); >> >> - wait_for_completion(&one_device->kobj_unregister); >> + wait_for_completion(&device->kobj_unregister); >> +} >> >> +/* when 2nd argument device is NULL, it removes all devices link */ >> +int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices, >> + struct btrfs_device *device) >> +{ >> + struct btrfs_fs_devices *seed_fs_devices; >> + >> + if (device) { >> + btrfs_sysfs_remove_device(device); >> return 0; >> } >> >> - list_for_each_entry(one_device, &fs_devices->devices, dev_list) { >> - >> - if (one_device->bdev) { >> - disk = one_device->bdev->bd_part; >> - disk_kobj = &part_to_dev(disk)->kobj; >> - sysfs_remove_link(fs_devices->devices_kobj, >> - disk_kobj->name); >> - } >> - kobject_del(&one_device->devid_kobj); >> - kobject_put(&one_device->devid_kobj); >> + list_for_each_entry(device, &fs_devices->devices, dev_list) >> + btrfs_sysfs_remove_device(device); >> >> - wait_for_completion(&one_device->kobj_unregister); >> + list_for_each_entry(seed_fs_devices, &fs_devices->seed_list, seed_list) { >> + list_for_each_entry(device, &seed_fs_devices->devices, dev_list) >> + btrfs_sysfs_remove_device(device); >> } >> >> return 0; >> @@ -1271,44 +1274,81 @@ static struct kobj_type devid_ktype = { >> .release = btrfs_release_devid_kobj, >> }; >> >> +static int btrfs_sysfs_add_device(struct btrfs_device *device) >> +{ >> + int ret; >> + struct kobject *devices_kobj; >> + struct kobject *devinfo_kobj; >> + >> + /* >> + * make sure we use the fs_info::fs_devices to fetch the kobjects >> + * even for the seed fs_devices >> + */ >> + devices_kobj = device->fs_devices->fs_info->fs_devices->devices_kobj; >> + devinfo_kobj = device->fs_devices->fs_info->fs_devices->devinfo_kobj; >> + ASSERT(devices_kobj); >> + ASSERT(devinfo_kobj); >> + >> + if (device->bdev) { >> + struct hd_struct *disk; >> + struct kobject *disk_kobj; >> + >> + disk = device->bdev->bd_part; >> + disk_kobj = &part_to_dev(disk)->kobj; >> + >> + ret = sysfs_create_link(devices_kobj, disk_kobj, >> + disk_kobj->name); >> + if (ret) { >> + btrfs_warn(device->fs_info, >> + "sysfs create device link failed %d devid %llu", >> + ret, device->devid); >> + return ret; >> + } >> + } >> + >> + init_completion(&device->kobj_unregister); >> + ret = kobject_init_and_add(&device->devid_kobj, &devid_ktype, >> + devinfo_kobj, "%llu", device->devid); >> + if (ret) { >> + kobject_put(&device->devid_kobj); >> + btrfs_warn(device->fs_info, >> + "sysfs devinfo init failed %d devid %llu", >> + ret, device->devid); >> + } >> + >> + return ret; >> +} >> + >> int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices, >> - struct btrfs_device *one_device) >> + struct btrfs_device *device) >> { >> - int error = 0; >> - struct btrfs_device *dev; >> + int ret = 0; >> unsigned int nofs_flag; >> + struct btrfs_fs_devices *seed_fs_devices; >> >> nofs_flag = memalloc_nofs_save(); >> - list_for_each_entry(dev, &fs_devices->devices, dev_list) { >> >> - if (one_device && one_device != dev) >> - continue; >> + if (device) >> + return btrfs_sysfs_add_device(device); >> >> - if (dev->bdev) { >> - struct hd_struct *disk; >> - struct kobject *disk_kobj; >> - >> - disk = dev->bdev->bd_part; >> - disk_kobj = &part_to_dev(disk)->kobj; >> - >> - error = sysfs_create_link(fs_devices->devices_kobj, >> - disk_kobj, disk_kobj->name); >> - if (error) >> - break; >> - } >> + list_for_each_entry(device, &fs_devices->devices, dev_list) { >> + ret = btrfs_sysfs_add_device(device); >> + if (ret) >> + goto out; >> + } >> >> - init_completion(&dev->kobj_unregister); >> - error = kobject_init_and_add(&dev->devid_kobj, &devid_ktype, >> - fs_devices->devinfo_kobj, "%llu", >> - dev->devid); >> - if (error) { >> - kobject_put(&dev->devid_kobj); >> - break; >> + list_for_each_entry(seed_fs_devices, &fs_devices->seed_list, seed_list) { >> + list_for_each_entry(device, &seed_fs_devices->devices, dev_list) { >> + ret = btrfs_sysfs_add_device(device); >> + if (ret) >> + goto out; >> } >> } >> + >> +out: >> memalloc_nofs_restore(nofs_flag); >> >> - return error; >> + return ret; >> } >> >> void btrfs_kobject_uevent(struct block_device *bdev, enum kobject_action action) >>
On 8/30/20 10:40 AM, Anand Jain wrote: > The following test case leads to null kobject-being-freed error. > > mount seed /mnt > add sprout to /mnt > umount /mnt > mount sprout to /mnt > delete seed > > kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called. > WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350 > RIP: 0010:kobject_put+0x80/0x350 > :: > Call Trace: > btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs] > btrfs_rm_device.cold+0xa8/0x298 [btrfs] > btrfs_ioctl+0x206c/0x22a0 [btrfs] > ksys_ioctl+0xe2/0x140 > __x64_sys_ioctl+0x1e/0x29 > do_syscall_64+0x96/0x150 > entry_SYSCALL_64_after_hwframe+0x44/0xa9 > RIP: 0033:0x7f4047c6288b > :: > > This is because, at the end of the seed device-delete, we try to remove > the seed's devid sysfs entry. But for the seed devices under the sprout > fs, we don't initialize the devid kobject yet. So this patch initializes > the seed device devid kobject and the device link in the sysfs. This takes > care of the Warning. > > Signed-off-by: Anand Jain <anand.jain@oracle.com> > --- Ok again, this will not work. Mount a seed fs, you get fs_info->fs_devices pointed at the seed device, and fs_info->fs_devices->devices_kobj is what is initialized. Now you sprout. This does clone_fs_devices(fs_info->fs_devices), which doesn't copy over fs_fdevices->devices_kobj. Now we take this clone device, and set fs_info->fsdevices to the cloned fs_devices, and we add the original fs_devices, which had the sysfs objects init'ed already, to fs_devices->seed_list. It appears to me you are completely ignoring this aspect of sprout. Maybe I'm missing something, but I've gone through this code twice now to see if what I think is happening is actually happening, and I'm convinced this is wrong. If it's _not_ wrong, and I _am_ missing something, then you need to explain why I'm wrong, and then go back and fix what needs to be fixed to make this whole process more clear, and _then_ you can do this patch series. Thanks, Josef
On 1/9/20 12:21 am, Josef Bacik wrote: > On 8/30/20 10:40 AM, Anand Jain wrote: >> The following test case leads to null kobject-being-freed error. >> >> mount seed /mnt >> add sprout to /mnt >> umount /mnt >> mount sprout to /mnt >> delete seed >> >> kobject: '(null)' (00000000dd2b87e4): is not initialized, yet >> kobject_put() is being called. >> WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350 >> RIP: 0010:kobject_put+0x80/0x350 >> :: >> Call Trace: >> btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs] >> btrfs_rm_device.cold+0xa8/0x298 [btrfs] >> btrfs_ioctl+0x206c/0x22a0 [btrfs] >> ksys_ioctl+0xe2/0x140 >> __x64_sys_ioctl+0x1e/0x29 >> do_syscall_64+0x96/0x150 >> entry_SYSCALL_64_after_hwframe+0x44/0xa9 >> RIP: 0033:0x7f4047c6288b >> :: >> >> This is because, at the end of the seed device-delete, we try to remove >> the seed's devid sysfs entry. But for the seed devices under the sprout >> fs, we don't initialize the devid kobject yet. So this patch initializes >> the seed device devid kobject and the device link in the sysfs. This >> takes >> care of the Warning. >> >> Signed-off-by: Anand Jain <anand.jain@oracle.com> >> --- > > Ok again, this will not work. > This comment was answered here [1] before. [1] https://patchwork.kernel.org/patch/11729425/ Again let me verify with the boilerplate code [2], which dumps btrfs_fs_devices and btrfs_device structures along with its memory address. [2] https://github.com/asj/bbp.git > Mount a seed fs, you get fs_info->fs_devices pointed at the seed device, > and fs_info->fs_devices->devices_kobj is what is initialized. Right. $ mount /dev/sda /btrfs mount: /btrfs: WARNING: device write-protected, mounted read-only. Note our fs_devices is at 000000005c8322d4, which is paired with sda. $ cat /proc/fs/btrfs/devlist | egrep "fsid|addr|device:|kobj" [fsid: 938a1ff2-f493-4a9a-9ed3-3d2ace2f0fb2] fs_devs_addr: 000000005c8322d4 <-- fs_devices fsid_kobj_state: 1 fsid_kobj_insysfs: 1 kobj_state: 1 kobj_insysfs: 1 dev_addr: 0000000017aba761 device: /dev/sda devid_kobj: 1 > Now you sprout. The relevant code snippets are as below. ----------------------------- 2344 static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info) 2361 seed_devices = alloc_fs_devices(NULL, NULL); 2371 old_devices = clone_fs_devices(fs_devices); 2377 list_add(&old_devices->fs_list, &fs_uuids); 2379 memcpy(seed_devices, fs_devices, sizeof(*seed_devices)); 2386 list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, 2387 synchronize_rcu); 2396 list_add_tail(&seed_devices->seed_list, &fs_devices->seed_list); 2398 generate_random_uuid(fs_devices->fsid); ----------------------------- > This does clone_fs_devices(fs_info->fs_devices), which doesn't copy over > fs_fdevices->devices_kobj. Right. Line 2371. > Now we take this clone device, and set > fs_info->fsdevices to the cloned fs_devices, Hm. No we just add it to the fs_uuids. So that user doesn't have to scan again to mount the seed device at a different mount point. Line 2377. > and we add the original > fs_devices, which had the sysfs objects init'ed already, to > fs_devices->seed_list. No. Original fs_devices still remains with fs_info->fs_devices. Which moves _devices_ _only_ to seed_devices::devices Line 2386. Lets verify. $ btrfs dev add -f /dev/sdb /btrfs Our fs_devices was at 000000005c8322d4. And after device add, our fs_devices 000000005c8322d4 is pairing with the RW device sdb. And the original sda is moved under seed_devices. Ignore clone_fs_devices its of no use to our conversation here. Which goes away after btrfs dev scan --forget as its not mounted anymore. As shown below. $ cat /proc/fs/btrfs/devlist | egrep "fsid|addr|device:|kobj" [fsid: 938a1ff2-f493-4a9a-9ed3-3d2ace2f0fb2] fs_devs_addr: 00000000f0c691d1 <-- clone_fs_devices fsid_kobj_state: 0 fsid_kobj_insysfs: 0 kobj_state: null kobj_insysfs: null dev_addr: 000000006e24ab09 device: /dev/sda devid_kobj: null [fsid: 91294469-9c0e-45c7-8dc7-af3134ba8cf4] fs_devs_addr: 000000005c8322d4 <-- fs_devices fsid_kobj_state: 1 fsid_kobj_insysfs: 1 kobj_state: 1 kobj_insysfs: 1 dev_addr: 0000000039f25fa1 device: /dev/sdb devid_kobj: 1 [[seed_fsid: 938a1ff2-f493-4a9a-9ed3-3d2ace2f0fb2]] sprout_fsid: 91294469-9c0e-45c7-8dc7-af3134ba8cf4 fs_devs_addr: 000000005e19db25 <-- seed_devices fsid_kobj_state: 1 fsid_kobj_insysfs: 1 kobj_state: 1 kobj_insysfs: 1 dev_addr: 0000000017aba761 device: /dev/sda devid_kobj: 1 Clone_fs_devices is freed after forget $ btrfs dev scan --forget $ cat /proc/fs/btrfs/devlist | egrep "fsid|addr|device:|kobj" [fsid: 91294469-9c0e-45c7-8dc7-af3134ba8cf4] fs_devs_addr: 000000005c8322d4 <-- fs_devices fsid_kobj_state: 1 fsid_kobj_insysfs: 1 kobj_state: 1 kobj_insysfs: 1 dev_addr: 0000000039f25fa1 device: /dev/sdb devid_kobj: 1 [[seed_fsid: 938a1ff2-f493-4a9a-9ed3-3d2ace2f0fb2]] sprout_fsid: 91294469-9c0e-45c7-8dc7-af3134ba8cf4 fs_devs_addr: 000000005e19db25 <-- seed_devcies fsid_kobj_state: 1 fsid_kobj_insysfs: 1 kobj_state: 1 kobj_insysfs: 1 dev_addr: 0000000017aba761 device: /dev/sda devid_kobj: 1 HTH Anand
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 190e59152be5..9b5e58091fae 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1149,45 +1149,48 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info, return 0; } -/* when one_device is NULL, it removes all device links */ - -int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices, - struct btrfs_device *one_device) +static void btrfs_sysfs_remove_device(struct btrfs_device *device) { struct hd_struct *disk; struct kobject *disk_kobj; + struct kobject *devices_kobj; - if (!fs_devices->devices_kobj) - return -EINVAL; + /* + * Seed fs_devices devices_kobj aren't used, fetch kobject from the + * fs_info::fs_devices. + */ + devices_kobj = device->fs_info->fs_devices->devices_kobj; + ASSERT(devices_kobj); - if (one_device) { - if (one_device->bdev) { - disk = one_device->bdev->bd_part; - disk_kobj = &part_to_dev(disk)->kobj; - sysfs_remove_link(fs_devices->devices_kobj, - disk_kobj->name); - } + if (device->bdev) { + disk = device->bdev->bd_part; + disk_kobj = &part_to_dev(disk)->kobj; + sysfs_remove_link(devices_kobj, disk_kobj->name); + } - kobject_del(&one_device->devid_kobj); - kobject_put(&one_device->devid_kobj); + kobject_del(&device->devid_kobj); + kobject_put(&device->devid_kobj); - wait_for_completion(&one_device->kobj_unregister); + wait_for_completion(&device->kobj_unregister); +} +/* when 2nd argument device is NULL, it removes all devices link */ +int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices, + struct btrfs_device *device) +{ + struct btrfs_fs_devices *seed_fs_devices; + + if (device) { + btrfs_sysfs_remove_device(device); return 0; } - list_for_each_entry(one_device, &fs_devices->devices, dev_list) { - - if (one_device->bdev) { - disk = one_device->bdev->bd_part; - disk_kobj = &part_to_dev(disk)->kobj; - sysfs_remove_link(fs_devices->devices_kobj, - disk_kobj->name); - } - kobject_del(&one_device->devid_kobj); - kobject_put(&one_device->devid_kobj); + list_for_each_entry(device, &fs_devices->devices, dev_list) + btrfs_sysfs_remove_device(device); - wait_for_completion(&one_device->kobj_unregister); + list_for_each_entry(seed_fs_devices, &fs_devices->seed_list, seed_list) { + list_for_each_entry(device, &seed_fs_devices->devices, dev_list) + btrfs_sysfs_remove_device(device); } return 0; @@ -1271,44 +1274,81 @@ static struct kobj_type devid_ktype = { .release = btrfs_release_devid_kobj, }; +static int btrfs_sysfs_add_device(struct btrfs_device *device) +{ + int ret; + struct kobject *devices_kobj; + struct kobject *devinfo_kobj; + + /* + * make sure we use the fs_info::fs_devices to fetch the kobjects + * even for the seed fs_devices + */ + devices_kobj = device->fs_devices->fs_info->fs_devices->devices_kobj; + devinfo_kobj = device->fs_devices->fs_info->fs_devices->devinfo_kobj; + ASSERT(devices_kobj); + ASSERT(devinfo_kobj); + + if (device->bdev) { + struct hd_struct *disk; + struct kobject *disk_kobj; + + disk = device->bdev->bd_part; + disk_kobj = &part_to_dev(disk)->kobj; + + ret = sysfs_create_link(devices_kobj, disk_kobj, + disk_kobj->name); + if (ret) { + btrfs_warn(device->fs_info, + "sysfs create device link failed %d devid %llu", + ret, device->devid); + return ret; + } + } + + init_completion(&device->kobj_unregister); + ret = kobject_init_and_add(&device->devid_kobj, &devid_ktype, + devinfo_kobj, "%llu", device->devid); + if (ret) { + kobject_put(&device->devid_kobj); + btrfs_warn(device->fs_info, + "sysfs devinfo init failed %d devid %llu", + ret, device->devid); + } + + return ret; +} + int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices, - struct btrfs_device *one_device) + struct btrfs_device *device) { - int error = 0; - struct btrfs_device *dev; + int ret = 0; unsigned int nofs_flag; + struct btrfs_fs_devices *seed_fs_devices; nofs_flag = memalloc_nofs_save(); - list_for_each_entry(dev, &fs_devices->devices, dev_list) { - if (one_device && one_device != dev) - continue; + if (device) + return btrfs_sysfs_add_device(device); - if (dev->bdev) { - struct hd_struct *disk; - struct kobject *disk_kobj; - - disk = dev->bdev->bd_part; - disk_kobj = &part_to_dev(disk)->kobj; - - error = sysfs_create_link(fs_devices->devices_kobj, - disk_kobj, disk_kobj->name); - if (error) - break; - } + list_for_each_entry(device, &fs_devices->devices, dev_list) { + ret = btrfs_sysfs_add_device(device); + if (ret) + goto out; + } - init_completion(&dev->kobj_unregister); - error = kobject_init_and_add(&dev->devid_kobj, &devid_ktype, - fs_devices->devinfo_kobj, "%llu", - dev->devid); - if (error) { - kobject_put(&dev->devid_kobj); - break; + list_for_each_entry(seed_fs_devices, &fs_devices->seed_list, seed_list) { + list_for_each_entry(device, &seed_fs_devices->devices, dev_list) { + ret = btrfs_sysfs_add_device(device); + if (ret) + goto out; } } + +out: memalloc_nofs_restore(nofs_flag); - return error; + return ret; } void btrfs_kobject_uevent(struct block_device *bdev, enum kobject_action action)
The following test case leads to null kobject-being-freed error. mount seed /mnt add sprout to /mnt umount /mnt mount sprout to /mnt delete seed kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called. WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350 RIP: 0010:kobject_put+0x80/0x350 :: Call Trace: btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs] btrfs_rm_device.cold+0xa8/0x298 [btrfs] btrfs_ioctl+0x206c/0x22a0 [btrfs] ksys_ioctl+0xe2/0x140 __x64_sys_ioctl+0x1e/0x29 do_syscall_64+0x96/0x150 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f4047c6288b :: This is because, at the end of the seed device-delete, we try to remove the seed's devid sysfs entry. But for the seed devices under the sprout fs, we don't initialize the devid kobject yet. So this patch initializes the seed device devid kobject and the device link in the sysfs. This takes care of the Warning. Signed-off-by: Anand Jain <anand.jain@oracle.com> --- fs/btrfs/sysfs.c | 146 ++++++++++++++++++++++++++++++----------------- 1 file changed, 93 insertions(+), 53 deletions(-)