diff mbox series

[1/2] btrfs: initialize sysfs devid and device link for seed device

Message ID 2c7ca821f53d71d6c1a4e1f1c969c1d8e686021a.1598012410.git.anand.jain@oracle.com
State New, archived
Headers show
Series [1/2] btrfs: initialize sysfs devid and device link for seed device | expand

Commit Message

Anand Jain Aug. 21, 2020, 1:15 p.m. UTC
The following test case leads to null kobject-being-freed error.

 mount seed /mnt
 add sprout to /mnt
 umount /mnt
 mount sprout to /mnt
 delete seed

 kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called.
 WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350
 RIP: 0010:kobject_put+0x80/0x350
 ::
 Call Trace:
 btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs]
 btrfs_rm_device.cold+0xa8/0x298 [btrfs]
 btrfs_ioctl+0x206c/0x22a0 [btrfs]
 ksys_ioctl+0xe2/0x140
 __x64_sys_ioctl+0x1e/0x29
 do_syscall_64+0x96/0x150
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f4047c6288b
 ::

This is because, at the end of the seed device-delete, we try to remove
the seed's devid sysfs entry. But for the seed devices under the sprout
fs, we don't initialize the devid kobject yet. So this patch initializes
the seed device devid kobject and the device link in the sysfs. This takes
care of the Warning.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/sysfs.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

Comments

Josef Bacik Aug. 21, 2020, 2:36 p.m. UTC | #1
On 8/21/20 9:15 AM, Anand Jain wrote:
> The following test case leads to null kobject-being-freed error.
> 
>   mount seed /mnt
>   add sprout to /mnt
>   umount /mnt
>   mount sprout to /mnt
>   delete seed
> 
>   kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called.
>   WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350
>   RIP: 0010:kobject_put+0x80/0x350
>   ::
>   Call Trace:
>   btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs]
>   btrfs_rm_device.cold+0xa8/0x298 [btrfs]
>   btrfs_ioctl+0x206c/0x22a0 [btrfs]
>   ksys_ioctl+0xe2/0x140
>   __x64_sys_ioctl+0x1e/0x29
>   do_syscall_64+0x96/0x150
>   entry_SYSCALL_64_after_hwframe+0x44/0xa9
>   RIP: 0033:0x7f4047c6288b
>   ::
> 
> This is because, at the end of the seed device-delete, we try to remove
> the seed's devid sysfs entry. But for the seed devices under the sprout
> fs, we don't initialize the devid kobject yet. So this patch initializes
> the seed device devid kobject and the device link in the sysfs. This takes
> care of the Warning.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> ---
>   fs/btrfs/sysfs.c | 30 ++++++++++++++++++++----------
>   1 file changed, 20 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index 88fd4ce937b8..85403fc3d5c7 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -1154,20 +1154,20 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
>   /* when one_device is NULL, it removes all device links */
>   
>   int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
> -		struct btrfs_device *one_device)
> +				   struct btrfs_device *one_device)
>   {
>   	struct hd_struct *disk;
>   	struct kobject *disk_kobj;
> +	struct kobject *devices_kobj = fs_devices->devices_kobj;
>   
> -	if (!fs_devices->devices_kobj)
> +	if (!devices_kobj)
>   		return -EINVAL;
>   
>   	if (one_device) {
>   		if (one_device->bdev) {
>   			disk = one_device->bdev->bd_part;
>   			disk_kobj = &part_to_dev(disk)->kobj;
> -			sysfs_remove_link(fs_devices->devices_kobj,
> -					  disk_kobj->name);
> +			sysfs_remove_link(devices_kobj, disk_kobj->name);
>   		}
>   
>   		kobject_del(&one_device->devid_kobj);
> @@ -1178,19 +1178,23 @@ int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
>   		return 0;
>   	}
>   
> +again:
>   	list_for_each_entry(one_device, &fs_devices->devices, dev_list) {
>   
>   		if (one_device->bdev) {
>   			disk = one_device->bdev->bd_part;
>   			disk_kobj = &part_to_dev(disk)->kobj;
> -			sysfs_remove_link(fs_devices->devices_kobj,
> -					  disk_kobj->name);
> +			sysfs_remove_link(devices_kobj, disk_kobj->name);
>   		}
>   		kobject_del(&one_device->devid_kobj);
>   		kobject_put(&one_device->devid_kobj);
>   
>   		wait_for_completion(&one_device->kobj_unregister);
>   	}
> +	while (fs_devices->seed) {
> +		fs_devices = fs_devices->seed;
> +		goto again;
> +	}
>   
>   	return 0;
>   }
> @@ -1279,8 +1283,11 @@ int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
>   	int error = 0;
>   	struct btrfs_device *dev;
>   	unsigned int nofs_flag;
> +	struct kobject *devices_kobj = fs_devices->devices_kobj;
> +	struct kobject *devinfo_kobj = fs_devices->devinfo_kobj;
>   
>   	nofs_flag = memalloc_nofs_save();
> +again:
>   	list_for_each_entry(dev, &fs_devices->devices, dev_list) {
>   
>   		if (one_device && one_device != dev)
> @@ -1293,21 +1300,24 @@ int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
>   			disk = dev->bdev->bd_part;
>   			disk_kobj = &part_to_dev(disk)->kobj;
>   
> -			error = sysfs_create_link(fs_devices->devices_kobj,
> -						  disk_kobj, disk_kobj->name);
> +			error = sysfs_create_link(devices_kobj, disk_kobj,
> +						  disk_kobj->name);
>   			if (error)
>   				break;
>   		}
>   
>   		init_completion(&dev->kobj_unregister);
>   		error = kobject_init_and_add(&dev->devid_kobj, &devid_ktype,
> -					     fs_devices->devinfo_kobj, "%llu",
> -					     dev->devid);
> +					     devinfo_kobj, "%llu", dev->devid);
>   		if (error) {
>   			kobject_put(&dev->devid_kobj);
>   			break;
>   		}
>   	}
> +	while(fs_devices->seed) {
> +		fs_devices = fs_devices->seed;
> +		goto again;
> +	}
>   	memalloc_nofs_restore(nofs_flag);
>   
>   	return error;
> 

So now we're using the main fs_devices->devices_kobj, which is the main 
fs_devices with fs_devices->seed being the seed fs_devices.  This is 
fine, except when we actually mount a seed device, and in that case we 
have fs_devices as the seed devices being used, and then if we add a 
device we'll actually swap in the new fs_devices for the main 
fs_devices, and we have the seed devices with the actual devices_kobj 
that we used set in fs_devices->seed, and thus we'll leak the sysfs 
objects for the seed devices.  Thanks,

Josef
Anand Jain Aug. 23, 2020, 1:05 p.m. UTC | #2
> 
> So now we're using the main fs_devices->devices_kobj, which is the main 
> fs_devices with fs_devices->seed being the seed fs_devices.  This is 
> fine, except when we actually mount a seed device, and in that case we 
> have fs_devices as the seed devices being used, and then if we add a 
> device we'll actually swap in the new fs_devices for the main 
> fs_devices, and we have the seed devices with the actual devices_kobj 
> that we used set in fs_devices->seed, and thus we'll leak the sysfs 
> objects for the seed devices.  Thanks,

Do you mean leaking the devinfo_kobj instead of devices_kobj? If so,
then yes, and this patch fixed it as well (I just found out).

Otherwise, no, there isn't devices_kobj leak.  We make sure only mounted
fsid has the devices_kobj initialized. So during sprouting- the
devices_kobj remains with the fs_info->fs_devices, as we move the seed
devices below the seed_devices.

static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
::
  list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
  synchronize_rcu);
  list_for_each_entry(device, &seed_devices->devices, dev_list)
  device->fs_devices = seed_devices;

And during unmount, we clean up the dev links and devices_kobj.

close_ctree()
   btrfs_sysfs_remove_mounted()
   btrfs_sysfs_remove_fsid()


Anand


> Josef
Anand Jain Aug. 29, 2020, 11:44 a.m. UTC | #3
On 21/8/20 9:15 pm, Anand Jain wrote:
> The following test case leads to null kobject-being-freed error.
> 
>   mount seed /mnt
>   add sprout to /mnt
>   umount /mnt
>   mount sprout to /mnt
>   delete seed
> 
>   kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called.
>   WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350
>   RIP: 0010:kobject_put+0x80/0x350
>   ::
>   Call Trace:
>   btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs]
>   btrfs_rm_device.cold+0xa8/0x298 [btrfs]
>   btrfs_ioctl+0x206c/0x22a0 [btrfs]
>   ksys_ioctl+0xe2/0x140
>   __x64_sys_ioctl+0x1e/0x29
>   do_syscall_64+0x96/0x150
>   entry_SYSCALL_64_after_hwframe+0x44/0xa9
>   RIP: 0033:0x7f4047c6288b
>   ::
> 
> This is because, at the end of the seed device-delete, we try to remove
> the seed's devid sysfs entry. But for the seed devices under the sprout
> fs, we don't initialize the devid kobject yet. So this patch initializes
> the seed device devid kobject and the device link in the sysfs. This takes
> care of the Warning.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> ---
>   fs/btrfs/sysfs.c | 30 ++++++++++++++++++++----------
>   1 file changed, 20 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index 88fd4ce937b8..85403fc3d5c7 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -1154,20 +1154,20 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
>   /* when one_device is NULL, it removes all device links */
>   
>   int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
> -		struct btrfs_device *one_device)
> +				   struct btrfs_device *one_device)
>   {
>   	struct hd_struct *disk;
>   	struct kobject *disk_kobj;
> +	struct kobject *devices_kobj = fs_devices->devices_kobj;
>   
> -	if (!fs_devices->devices_kobj)
> +	if (!devices_kobj)
>   		return -EINVAL;
>   
>   	if (one_device) {
>   		if (one_device->bdev) {
>   			disk = one_device->bdev->bd_part;
>   			disk_kobj = &part_to_dev(disk)->kobj;
> -			sysfs_remove_link(fs_devices->devices_kobj,
> -					  disk_kobj->name);
> +			sysfs_remove_link(devices_kobj, disk_kobj->name);
>   		}
>   
>   		kobject_del(&one_device->devid_kobj);
> @@ -1178,19 +1178,23 @@ int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
>   		return 0;
>   	}
>   
> +again:
>   	list_for_each_entry(one_device, &fs_devices->devices, dev_list) {
>   
>   		if (one_device->bdev) {
>   			disk = one_device->bdev->bd_part;
>   			disk_kobj = &part_to_dev(disk)->kobj;
> -			sysfs_remove_link(fs_devices->devices_kobj,
> -					  disk_kobj->name);
> +			sysfs_remove_link(devices_kobj, disk_kobj->name);
>   		}
>   		kobject_del(&one_device->devid_kobj);
>   		kobject_put(&one_device->devid_kobj);
>   
>   		wait_for_completion(&one_device->kobj_unregister);
>   	}
> +	while (fs_devices->seed) {
> +		fs_devices = fs_devices->seed;
> +		goto again;
> +	}
>   
>   	return 0;
>   }
> @@ -1279,8 +1283,11 @@ int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
>   	int error = 0;
>   	struct btrfs_device *dev;
>   	unsigned int nofs_flag;
> +	struct kobject *devices_kobj = fs_devices->devices_kobj;
> +	struct kobject *devinfo_kobj = fs_devices->devinfo_kobj;
>   
>   	nofs_flag = memalloc_nofs_save();
> +again:
>   	list_for_each_entry(dev, &fs_devices->devices, dev_list) {
>   
>   		if (one_device && one_device != dev)
> @@ -1293,21 +1300,24 @@ int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
>   			disk = dev->bdev->bd_part;
>   			disk_kobj = &part_to_dev(disk)->kobj;
>   
> -			error = sysfs_create_link(fs_devices->devices_kobj,
> -						  disk_kobj, disk_kobj->name);
> +			error = sysfs_create_link(devices_kobj, disk_kobj,
> +						  disk_kobj->name);
>   			if (error)
>   				break;
>   		}
>   
>   		init_completion(&dev->kobj_unregister);
>   		error = kobject_init_and_add(&dev->devid_kobj, &devid_ktype,
> -					     fs_devices->devinfo_kobj, "%llu",
> -					     dev->devid);
> +					     devinfo_kobj, "%llu", dev->devid);
>   		if (error) {
>   			kobject_put(&dev->devid_kobj);
>   			break;
>   		}
>   	}
> +	while(fs_devices->seed) {
> +		fs_devices = fs_devices->seed;
> +		goto again;
> +	}
>   	memalloc_nofs_restore(nofs_flag);
>   
>   	return error;
> 


Ping?

Thanks, Anand
diff mbox series

Patch

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 88fd4ce937b8..85403fc3d5c7 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1154,20 +1154,20 @@  int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
 /* when one_device is NULL, it removes all device links */
 
 int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
-		struct btrfs_device *one_device)
+				   struct btrfs_device *one_device)
 {
 	struct hd_struct *disk;
 	struct kobject *disk_kobj;
+	struct kobject *devices_kobj = fs_devices->devices_kobj;
 
-	if (!fs_devices->devices_kobj)
+	if (!devices_kobj)
 		return -EINVAL;
 
 	if (one_device) {
 		if (one_device->bdev) {
 			disk = one_device->bdev->bd_part;
 			disk_kobj = &part_to_dev(disk)->kobj;
-			sysfs_remove_link(fs_devices->devices_kobj,
-					  disk_kobj->name);
+			sysfs_remove_link(devices_kobj, disk_kobj->name);
 		}
 
 		kobject_del(&one_device->devid_kobj);
@@ -1178,19 +1178,23 @@  int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
 		return 0;
 	}
 
+again:
 	list_for_each_entry(one_device, &fs_devices->devices, dev_list) {
 
 		if (one_device->bdev) {
 			disk = one_device->bdev->bd_part;
 			disk_kobj = &part_to_dev(disk)->kobj;
-			sysfs_remove_link(fs_devices->devices_kobj,
-					  disk_kobj->name);
+			sysfs_remove_link(devices_kobj, disk_kobj->name);
 		}
 		kobject_del(&one_device->devid_kobj);
 		kobject_put(&one_device->devid_kobj);
 
 		wait_for_completion(&one_device->kobj_unregister);
 	}
+	while (fs_devices->seed) {
+		fs_devices = fs_devices->seed;
+		goto again;
+	}
 
 	return 0;
 }
@@ -1279,8 +1283,11 @@  int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
 	int error = 0;
 	struct btrfs_device *dev;
 	unsigned int nofs_flag;
+	struct kobject *devices_kobj = fs_devices->devices_kobj;
+	struct kobject *devinfo_kobj = fs_devices->devinfo_kobj;
 
 	nofs_flag = memalloc_nofs_save();
+again:
 	list_for_each_entry(dev, &fs_devices->devices, dev_list) {
 
 		if (one_device && one_device != dev)
@@ -1293,21 +1300,24 @@  int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
 			disk = dev->bdev->bd_part;
 			disk_kobj = &part_to_dev(disk)->kobj;
 
-			error = sysfs_create_link(fs_devices->devices_kobj,
-						  disk_kobj, disk_kobj->name);
+			error = sysfs_create_link(devices_kobj, disk_kobj,
+						  disk_kobj->name);
 			if (error)
 				break;
 		}
 
 		init_completion(&dev->kobj_unregister);
 		error = kobject_init_and_add(&dev->devid_kobj, &devid_ktype,
-					     fs_devices->devinfo_kobj, "%llu",
-					     dev->devid);
+					     devinfo_kobj, "%llu", dev->devid);
 		if (error) {
 			kobject_put(&dev->devid_kobj);
 			break;
 		}
 	}
+	while(fs_devices->seed) {
+		fs_devices = fs_devices->seed;
+		goto again;
+	}
 	memalloc_nofs_restore(nofs_flag);
 
 	return error;