diff mbox series

[01/11] btrfs: initialize sysfs devid and device link for seed device

Message ID 2db650ec206db1cb3e68590951b59e222fb10116.1598792561.git.anand.jain@oracle.com (mailing list archive)
State New, archived
Headers show
Series btrfs: seed fix null ptr, use only main device_list_mutex, and cleanups | expand

Commit Message

Anand Jain Aug. 30, 2020, 2:40 p.m. UTC
The following test case leads to null kobject-being-freed error.

 mount seed /mnt
 add sprout to /mnt
 umount /mnt
 mount sprout to /mnt
 delete seed

 kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called.
 WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350
 RIP: 0010:kobject_put+0x80/0x350
 ::
 Call Trace:
 btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs]
 btrfs_rm_device.cold+0xa8/0x298 [btrfs]
 btrfs_ioctl+0x206c/0x22a0 [btrfs]
 ksys_ioctl+0xe2/0x140
 __x64_sys_ioctl+0x1e/0x29
 do_syscall_64+0x96/0x150
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f4047c6288b
 ::

This is because, at the end of the seed device-delete, we try to remove
the seed's devid sysfs entry. But for the seed devices under the sprout
fs, we don't initialize the devid kobject yet. So this patch initializes
the seed device devid kobject and the device link in the sysfs. This takes
care of the Warning.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/sysfs.c | 146 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 93 insertions(+), 53 deletions(-)

Comments

Nikolay Borisov Aug. 31, 2020, 9:07 a.m. UTC | #1
On 30.08.20 г. 17:40 ч., Anand Jain wrote:
> The following test case leads to null kobject-being-freed error.
> 
>  mount seed /mnt
>  add sprout to /mnt
>  umount /mnt
>  mount sprout to /mnt
>  delete seed
> 
>  kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called.
>  WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350
>  RIP: 0010:kobject_put+0x80/0x350
>  ::
>  Call Trace:
>  btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs]
>  btrfs_rm_device.cold+0xa8/0x298 [btrfs]
>  btrfs_ioctl+0x206c/0x22a0 [btrfs]
>  ksys_ioctl+0xe2/0x140
>  __x64_sys_ioctl+0x1e/0x29
>  do_syscall_64+0x96/0x150
>  entry_SYSCALL_64_after_hwframe+0x44/0xa9
>  RIP: 0033:0x7f4047c6288b
>  ::
> 
> This is because, at the end of the seed device-delete, we try to remove
> the seed's devid sysfs entry. But for the seed devices under the sprout
> fs, we don't initialize the devid kobject yet. So this patch initializes
> the seed device devid kobject and the device link in the sysfs. This takes
> care of the Warning.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>


This patch is doing too many things at once. Split it so that:
1. You first add the new functions add_device/remove_device in 2
separate patches.
2. Switch btrfs_sysfs_add_devices_dir/btrfs_sysfs_remove_devices_dir to
ousing the newly added helpers, again in 2 separate patches

> ---
>  fs/btrfs/sysfs.c | 146 ++++++++++++++++++++++++++++++-----------------
>  1 file changed, 93 insertions(+), 53 deletions(-)
> 
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index 190e59152be5..9b5e58091fae 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -1149,45 +1149,48 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
>  	return 0;
>  }
>  
> -/* when one_device is NULL, it removes all device links */
> -
> -int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
> -		struct btrfs_device *one_device)
> +static void btrfs_sysfs_remove_device(struct btrfs_device *device)
>  {
>  	struct hd_struct *disk;
>  	struct kobject *disk_kobj;
> +	struct kobject *devices_kobj;
>  
> -	if (!fs_devices->devices_kobj)
> -		return -EINVAL;
> +	/*
> +	 * Seed fs_devices devices_kobj aren't used, fetch kobject from the
> +	 * fs_info::fs_devices.
> +	 */
> +	devices_kobj = device->fs_info->fs_devices->devices_kobj;
> +	ASSERT(devices_kobj);
>  
> -	if (one_device) {
> -		if (one_device->bdev) {
> -			disk = one_device->bdev->bd_part;
> -			disk_kobj = &part_to_dev(disk)->kobj;
> -			sysfs_remove_link(fs_devices->devices_kobj,
> -					  disk_kobj->name);
> -		}
> +	if (device->bdev) {
> +		disk = device->bdev->bd_part;
> +		disk_kobj = &part_to_dev(disk)->kobj;
> +		sysfs_remove_link(devices_kobj, disk_kobj->name);
> +	}
>  
> -		kobject_del(&one_device->devid_kobj);
> -		kobject_put(&one_device->devid_kobj);
> +	kobject_del(&device->devid_kobj);
> +	kobject_put(&device->devid_kobj);
>  
> -		wait_for_completion(&one_device->kobj_unregister);
> +	wait_for_completion(&device->kobj_unregister);
> +}
>  
> +/* when 2nd argument device is NULL, it removes all devices link */
> +int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
> +				   struct btrfs_device *device)
> +{
> +	struct btrfs_fs_devices *seed_fs_devices;
> +
> +	if (device) {
> +		btrfs_sysfs_remove_device(device);
>  		return 0;
>  	}
>  
> -	list_for_each_entry(one_device, &fs_devices->devices, dev_list) {
> -
> -		if (one_device->bdev) {
> -			disk = one_device->bdev->bd_part;
> -			disk_kobj = &part_to_dev(disk)->kobj;
> -			sysfs_remove_link(fs_devices->devices_kobj,
> -					  disk_kobj->name);
> -		}
> -		kobject_del(&one_device->devid_kobj);
> -		kobject_put(&one_device->devid_kobj);
> +	list_for_each_entry(device, &fs_devices->devices, dev_list)
> +		btrfs_sysfs_remove_device(device);
>  
> -		wait_for_completion(&one_device->kobj_unregister);
> +	list_for_each_entry(seed_fs_devices, &fs_devices->seed_list, seed_list) {
> +		list_for_each_entry(device, &seed_fs_devices->devices, dev_list)
> +			btrfs_sysfs_remove_device(device);
>  	}
>  
>  	return 0;
> @@ -1271,44 +1274,81 @@ static struct kobj_type devid_ktype = {
>  	.release	= btrfs_release_devid_kobj,
>  };
>  
> +static int btrfs_sysfs_add_device(struct btrfs_device *device)
> +{
> +	int ret;
> +	struct kobject *devices_kobj;
> +        struct kobject *devinfo_kobj;
> +
> +	/*
> +	 * make sure we use the fs_info::fs_devices to fetch the kobjects
> +	 * even for the seed fs_devices
> +	 */
> +	devices_kobj = device->fs_devices->fs_info->fs_devices->devices_kobj;
> +	devinfo_kobj = device->fs_devices->fs_info->fs_devices->devinfo_kobj;
> +	ASSERT(devices_kobj);
> +	ASSERT(devinfo_kobj);
> +
> +	if (device->bdev) {
> +		struct hd_struct *disk;
> +		struct kobject *disk_kobj;
> +
> +		disk = device->bdev->bd_part;
> +		disk_kobj = &part_to_dev(disk)->kobj;
> +
> +		ret = sysfs_create_link(devices_kobj, disk_kobj,
> +					disk_kobj->name);
> +		if (ret) {
> +			btrfs_warn(device->fs_info,
> +			   "sysfs create device link failed %d devid %llu",
> +				   ret, device->devid);
> +			return ret;
> +		}
> +	}
> +
> +	init_completion(&device->kobj_unregister);
> +	ret = kobject_init_and_add(&device->devid_kobj, &devid_ktype,
> +				   devinfo_kobj, "%llu", device->devid);
> +	if (ret) {
> +		kobject_put(&device->devid_kobj);
> +		btrfs_warn(device->fs_info,
> +			   "sysfs devinfo init failed %d devid %llu",
> +			   ret, device->devid);
> +	}
> +
> +	return ret;
> +}
> +
>  int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
> -				struct btrfs_device *one_device)
> +				struct btrfs_device *device)
>  {
> -	int error = 0;
> -	struct btrfs_device *dev;
> +	int ret = 0;
>  	unsigned int nofs_flag;
> +	struct btrfs_fs_devices *seed_fs_devices;
>  
>  	nofs_flag = memalloc_nofs_save();
> -	list_for_each_entry(dev, &fs_devices->devices, dev_list) {
>  
> -		if (one_device && one_device != dev)
> -			continue;
> +	if (device)
> +		return btrfs_sysfs_add_device(device);
>  
> -		if (dev->bdev) {
> -			struct hd_struct *disk;
> -			struct kobject *disk_kobj;
> -
> -			disk = dev->bdev->bd_part;
> -			disk_kobj = &part_to_dev(disk)->kobj;
> -
> -			error = sysfs_create_link(fs_devices->devices_kobj,
> -						  disk_kobj, disk_kobj->name);
> -			if (error)
> -				break;
> -		}
> +	list_for_each_entry(device, &fs_devices->devices, dev_list) {
> +		ret = btrfs_sysfs_add_device(device);
> +		if (ret)
> +			goto out;
> +	}
>  
> -		init_completion(&dev->kobj_unregister);
> -		error = kobject_init_and_add(&dev->devid_kobj, &devid_ktype,
> -					     fs_devices->devinfo_kobj, "%llu",
> -					     dev->devid);
> -		if (error) {
> -			kobject_put(&dev->devid_kobj);
> -			break;
> +	list_for_each_entry(seed_fs_devices, &fs_devices->seed_list, seed_list) {
> +		list_for_each_entry(device, &seed_fs_devices->devices, dev_list) {
> +			ret = btrfs_sysfs_add_device(device);
> +			if (ret)
> +				goto out;
>  		}
>  	}
> +
> +out:
>  	memalloc_nofs_restore(nofs_flag);
>  
> -	return error;
> +	return ret;
>  }
>  
>  void btrfs_kobject_uevent(struct block_device *bdev, enum kobject_action action)
>
Anand Jain Aug. 31, 2020, noon UTC | #2
On 31/8/20 5:07 pm, Nikolay Borisov wrote:
> 
> 
> On 30.08.20 г. 17:40 ч., Anand Jain wrote:
>> The following test case leads to null kobject-being-freed error.
>>
>>   mount seed /mnt
>>   add sprout to /mnt
>>   umount /mnt
>>   mount sprout to /mnt
>>   delete seed
>>
>>   kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called.
>>   WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350
>>   RIP: 0010:kobject_put+0x80/0x350
>>   ::
>>   Call Trace:
>>   btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs]
>>   btrfs_rm_device.cold+0xa8/0x298 [btrfs]
>>   btrfs_ioctl+0x206c/0x22a0 [btrfs]
>>   ksys_ioctl+0xe2/0x140
>>   __x64_sys_ioctl+0x1e/0x29
>>   do_syscall_64+0x96/0x150
>>   entry_SYSCALL_64_after_hwframe+0x44/0xa9
>>   RIP: 0033:0x7f4047c6288b
>>   ::
>>
>> This is because, at the end of the seed device-delete, we try to remove
>> the seed's devid sysfs entry. But for the seed devices under the sprout
>> fs, we don't initialize the devid kobject yet. So this patch initializes
>> the seed device devid kobject and the device link in the sysfs. This takes
>> care of the Warning.
>>
>> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> 



> This patch is doing too many things at once. Split it so that:
> 1. You first add the new functions add_device/remove_device in 2
> separate patches.
> 2. Switch btrfs_sysfs_add_devices_dir/btrfs_sysfs_remove_devices_dir to
> ousing the newly added helpers, again in 2 separate patches

yeah and

  3. Initialize seed devices devid kobject to fix the bug.

fixing.

-Anand

> 
>> ---
>>   fs/btrfs/sysfs.c | 146 ++++++++++++++++++++++++++++++-----------------
>>   1 file changed, 93 insertions(+), 53 deletions(-)
>>
>> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
>> index 190e59152be5..9b5e58091fae 100644
>> --- a/fs/btrfs/sysfs.c
>> +++ b/fs/btrfs/sysfs.c
>> @@ -1149,45 +1149,48 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
>>   	return 0;
>>   }
>>   
>> -/* when one_device is NULL, it removes all device links */
>> -
>> -int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
>> -		struct btrfs_device *one_device)
>> +static void btrfs_sysfs_remove_device(struct btrfs_device *device)
>>   {
>>   	struct hd_struct *disk;
>>   	struct kobject *disk_kobj;
>> +	struct kobject *devices_kobj;
>>   
>> -	if (!fs_devices->devices_kobj)
>> -		return -EINVAL;
>> +	/*
>> +	 * Seed fs_devices devices_kobj aren't used, fetch kobject from the
>> +	 * fs_info::fs_devices.
>> +	 */
>> +	devices_kobj = device->fs_info->fs_devices->devices_kobj;
>> +	ASSERT(devices_kobj);
>>   
>> -	if (one_device) {
>> -		if (one_device->bdev) {
>> -			disk = one_device->bdev->bd_part;
>> -			disk_kobj = &part_to_dev(disk)->kobj;
>> -			sysfs_remove_link(fs_devices->devices_kobj,
>> -					  disk_kobj->name);
>> -		}
>> +	if (device->bdev) {
>> +		disk = device->bdev->bd_part;
>> +		disk_kobj = &part_to_dev(disk)->kobj;
>> +		sysfs_remove_link(devices_kobj, disk_kobj->name);
>> +	}
>>   
>> -		kobject_del(&one_device->devid_kobj);
>> -		kobject_put(&one_device->devid_kobj);
>> +	kobject_del(&device->devid_kobj);
>> +	kobject_put(&device->devid_kobj);
>>   
>> -		wait_for_completion(&one_device->kobj_unregister);
>> +	wait_for_completion(&device->kobj_unregister);
>> +}
>>   
>> +/* when 2nd argument device is NULL, it removes all devices link */
>> +int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
>> +				   struct btrfs_device *device)
>> +{
>> +	struct btrfs_fs_devices *seed_fs_devices;
>> +
>> +	if (device) {
>> +		btrfs_sysfs_remove_device(device);
>>   		return 0;
>>   	}
>>   
>> -	list_for_each_entry(one_device, &fs_devices->devices, dev_list) {
>> -
>> -		if (one_device->bdev) {
>> -			disk = one_device->bdev->bd_part;
>> -			disk_kobj = &part_to_dev(disk)->kobj;
>> -			sysfs_remove_link(fs_devices->devices_kobj,
>> -					  disk_kobj->name);
>> -		}
>> -		kobject_del(&one_device->devid_kobj);
>> -		kobject_put(&one_device->devid_kobj);
>> +	list_for_each_entry(device, &fs_devices->devices, dev_list)
>> +		btrfs_sysfs_remove_device(device);
>>   
>> -		wait_for_completion(&one_device->kobj_unregister);
>> +	list_for_each_entry(seed_fs_devices, &fs_devices->seed_list, seed_list) {
>> +		list_for_each_entry(device, &seed_fs_devices->devices, dev_list)
>> +			btrfs_sysfs_remove_device(device);
>>   	}
>>   
>>   	return 0;
>> @@ -1271,44 +1274,81 @@ static struct kobj_type devid_ktype = {
>>   	.release	= btrfs_release_devid_kobj,
>>   };
>>   
>> +static int btrfs_sysfs_add_device(struct btrfs_device *device)
>> +{
>> +	int ret;
>> +	struct kobject *devices_kobj;
>> +        struct kobject *devinfo_kobj;
>> +
>> +	/*
>> +	 * make sure we use the fs_info::fs_devices to fetch the kobjects
>> +	 * even for the seed fs_devices
>> +	 */
>> +	devices_kobj = device->fs_devices->fs_info->fs_devices->devices_kobj;
>> +	devinfo_kobj = device->fs_devices->fs_info->fs_devices->devinfo_kobj;
>> +	ASSERT(devices_kobj);
>> +	ASSERT(devinfo_kobj);
>> +
>> +	if (device->bdev) {
>> +		struct hd_struct *disk;
>> +		struct kobject *disk_kobj;
>> +
>> +		disk = device->bdev->bd_part;
>> +		disk_kobj = &part_to_dev(disk)->kobj;
>> +
>> +		ret = sysfs_create_link(devices_kobj, disk_kobj,
>> +					disk_kobj->name);
>> +		if (ret) {
>> +			btrfs_warn(device->fs_info,
>> +			   "sysfs create device link failed %d devid %llu",
>> +				   ret, device->devid);
>> +			return ret;
>> +		}
>> +	}
>> +
>> +	init_completion(&device->kobj_unregister);
>> +	ret = kobject_init_and_add(&device->devid_kobj, &devid_ktype,
>> +				   devinfo_kobj, "%llu", device->devid);
>> +	if (ret) {
>> +		kobject_put(&device->devid_kobj);
>> +		btrfs_warn(device->fs_info,
>> +			   "sysfs devinfo init failed %d devid %llu",
>> +			   ret, device->devid);
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>>   int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
>> -				struct btrfs_device *one_device)
>> +				struct btrfs_device *device)
>>   {
>> -	int error = 0;
>> -	struct btrfs_device *dev;
>> +	int ret = 0;
>>   	unsigned int nofs_flag;
>> +	struct btrfs_fs_devices *seed_fs_devices;
>>   
>>   	nofs_flag = memalloc_nofs_save();
>> -	list_for_each_entry(dev, &fs_devices->devices, dev_list) {
>>   
>> -		if (one_device && one_device != dev)
>> -			continue;
>> +	if (device)
>> +		return btrfs_sysfs_add_device(device);
>>   
>> -		if (dev->bdev) {
>> -			struct hd_struct *disk;
>> -			struct kobject *disk_kobj;
>> -
>> -			disk = dev->bdev->bd_part;
>> -			disk_kobj = &part_to_dev(disk)->kobj;
>> -
>> -			error = sysfs_create_link(fs_devices->devices_kobj,
>> -						  disk_kobj, disk_kobj->name);
>> -			if (error)
>> -				break;
>> -		}
>> +	list_for_each_entry(device, &fs_devices->devices, dev_list) {
>> +		ret = btrfs_sysfs_add_device(device);
>> +		if (ret)
>> +			goto out;
>> +	}
>>   
>> -		init_completion(&dev->kobj_unregister);
>> -		error = kobject_init_and_add(&dev->devid_kobj, &devid_ktype,
>> -					     fs_devices->devinfo_kobj, "%llu",
>> -					     dev->devid);
>> -		if (error) {
>> -			kobject_put(&dev->devid_kobj);
>> -			break;
>> +	list_for_each_entry(seed_fs_devices, &fs_devices->seed_list, seed_list) {
>> +		list_for_each_entry(device, &seed_fs_devices->devices, dev_list) {
>> +			ret = btrfs_sysfs_add_device(device);
>> +			if (ret)
>> +				goto out;
>>   		}
>>   	}
>> +
>> +out:
>>   	memalloc_nofs_restore(nofs_flag);
>>   
>> -	return error;
>> +	return ret;
>>   }
>>   
>>   void btrfs_kobject_uevent(struct block_device *bdev, enum kobject_action action)
>>
Josef Bacik Aug. 31, 2020, 4:21 p.m. UTC | #3
On 8/30/20 10:40 AM, Anand Jain wrote:
> The following test case leads to null kobject-being-freed error.
> 
>   mount seed /mnt
>   add sprout to /mnt
>   umount /mnt
>   mount sprout to /mnt
>   delete seed
> 
>   kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called.
>   WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350
>   RIP: 0010:kobject_put+0x80/0x350
>   ::
>   Call Trace:
>   btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs]
>   btrfs_rm_device.cold+0xa8/0x298 [btrfs]
>   btrfs_ioctl+0x206c/0x22a0 [btrfs]
>   ksys_ioctl+0xe2/0x140
>   __x64_sys_ioctl+0x1e/0x29
>   do_syscall_64+0x96/0x150
>   entry_SYSCALL_64_after_hwframe+0x44/0xa9
>   RIP: 0033:0x7f4047c6288b
>   ::
> 
> This is because, at the end of the seed device-delete, we try to remove
> the seed's devid sysfs entry. But for the seed devices under the sprout
> fs, we don't initialize the devid kobject yet. So this patch initializes
> the seed device devid kobject and the device link in the sysfs. This takes
> care of the Warning.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> ---

Ok again, this will not work.

Mount a seed fs, you get fs_info->fs_devices pointed at the seed device, and 
fs_info->fs_devices->devices_kobj is what is initialized.

Now you sprout.

This does clone_fs_devices(fs_info->fs_devices), which doesn't copy over 
fs_fdevices->devices_kobj.  Now we take this clone device, and set 
fs_info->fsdevices to the cloned fs_devices, and we add the original fs_devices, 
which had the sysfs objects init'ed already, to fs_devices->seed_list.

It appears to me you are completely ignoring this aspect of sprout.  Maybe I'm 
missing something, but I've gone through this code twice now to see if what I 
think is happening is actually happening, and I'm convinced this is wrong.

If it's _not_ wrong, and I _am_ missing something, then you need to explain why 
I'm wrong, and then go back and fix what needs to be fixed to make this whole 
process more clear, and _then_ you can do this patch series.  Thanks,

Josef
Anand Jain Sept. 1, 2020, 4:16 p.m. UTC | #4
On 1/9/20 12:21 am, Josef Bacik wrote:
> On 8/30/20 10:40 AM, Anand Jain wrote:
>> The following test case leads to null kobject-being-freed error.
>>
>>   mount seed /mnt
>>   add sprout to /mnt
>>   umount /mnt
>>   mount sprout to /mnt
>>   delete seed
>>
>>   kobject: '(null)' (00000000dd2b87e4): is not initialized, yet 
>> kobject_put() is being called.
>>   WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350
>>   RIP: 0010:kobject_put+0x80/0x350
>>   ::
>>   Call Trace:
>>   btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs]
>>   btrfs_rm_device.cold+0xa8/0x298 [btrfs]
>>   btrfs_ioctl+0x206c/0x22a0 [btrfs]
>>   ksys_ioctl+0xe2/0x140
>>   __x64_sys_ioctl+0x1e/0x29
>>   do_syscall_64+0x96/0x150
>>   entry_SYSCALL_64_after_hwframe+0x44/0xa9
>>   RIP: 0033:0x7f4047c6288b
>>   ::
>>
>> This is because, at the end of the seed device-delete, we try to remove
>> the seed's devid sysfs entry. But for the seed devices under the sprout
>> fs, we don't initialize the devid kobject yet. So this patch initializes
>> the seed device devid kobject and the device link in the sysfs. This 
>> takes
>> care of the Warning.
>>
>> Signed-off-by: Anand Jain <anand.jain@oracle.com>
>> ---
> 
> Ok again, this will not work.
> 

  This comment was answered here [1] before.
    [1]  https://patchwork.kernel.org/patch/11729425/


  Again let me verify with the boilerplate code [2], which dumps
  btrfs_fs_devices and btrfs_device structures along with its
  memory address.
    [2] https://github.com/asj/bbp.git

> Mount a seed fs, you get fs_info->fs_devices pointed at the seed device, 
> and fs_info->fs_devices->devices_kobj is what is initialized.

Right.

$ mount /dev/sda /btrfs
mount: /btrfs: WARNING: device write-protected, mounted read-only.

Note our fs_devices is at 000000005c8322d4, which is paired with sda.

$ cat /proc/fs/btrfs/devlist | egrep "fsid|addr|device:|kobj"
[fsid: 938a1ff2-f493-4a9a-9ed3-3d2ace2f0fb2]
	fs_devs_addr:		000000005c8322d4 <-- fs_devices
	fsid_kobj_state:	1
	fsid_kobj_insysfs:	1
	kobj_state:		1
	kobj_insysfs:		1
		dev_addr:	0000000017aba761
		device:		/dev/sda
		devid_kobj:	1


> Now you sprout.


The relevant code snippets are as below.

-----------------------------
2344 static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)

2361         seed_devices = alloc_fs_devices(NULL, NULL);

2371         old_devices = clone_fs_devices(fs_devices);

2377         list_add(&old_devices->fs_list, &fs_uuids);

2379         memcpy(seed_devices, fs_devices, sizeof(*seed_devices));

2386         list_splice_init_rcu(&fs_devices->devices, 
&seed_devices->devices,
2387                               synchronize_rcu);

2396         list_add_tail(&seed_devices->seed_list, 
&fs_devices->seed_list);

2398         generate_random_uuid(fs_devices->fsid);
-----------------------------


> This does clone_fs_devices(fs_info->fs_devices), which doesn't copy over 
> fs_fdevices->devices_kobj.

  Right. Line 2371.

>  Now we take this clone device, and set 
> fs_info->fsdevices to the cloned fs_devices,

  Hm. No we just add it to the fs_uuids. So that user doesn't have
  to scan again to mount the seed device at a different mount point.
  Line 2377.

> and we add the original 
> fs_devices, which had the sysfs objects init'ed already, to 
> fs_devices->seed_list.

  No. Original fs_devices still remains with fs_info->fs_devices.
  Which moves _devices_ _only_  to seed_devices::devices
  Line 2386.

  Lets verify.

  $ btrfs dev add -f /dev/sdb /btrfs

  Our fs_devices was at 000000005c8322d4.

  And after device add, our fs_devices 000000005c8322d4 is pairing with
  the RW device sdb.

  And the original sda is moved under seed_devices.

  Ignore clone_fs_devices its of no use to our conversation here.
  Which goes away after btrfs dev scan --forget as its not mounted
  anymore.

  As shown below.

$ cat /proc/fs/btrfs/devlist | egrep "fsid|addr|device:|kobj"
[fsid: 938a1ff2-f493-4a9a-9ed3-3d2ace2f0fb2]
	fs_devs_addr:		00000000f0c691d1 <-- clone_fs_devices
	fsid_kobj_state:	0
	fsid_kobj_insysfs:	0
	kobj_state:		null
	kobj_insysfs:		null
		dev_addr:	000000006e24ab09
		device:		/dev/sda
		devid_kobj:	null
[fsid: 91294469-9c0e-45c7-8dc7-af3134ba8cf4]
	fs_devs_addr:		000000005c8322d4 <-- fs_devices
	fsid_kobj_state:	1
	fsid_kobj_insysfs:	1
	kobj_state:		1
	kobj_insysfs:		1
		dev_addr:	0000000039f25fa1
		device:		/dev/sdb
		devid_kobj:	1
	[[seed_fsid: 938a1ff2-f493-4a9a-9ed3-3d2ace2f0fb2]]
		sprout_fsid:		91294469-9c0e-45c7-8dc7-af3134ba8cf4
		fs_devs_addr:		000000005e19db25 <-- seed_devices
		fsid_kobj_state:	1
		fsid_kobj_insysfs:	1
		kobj_state:		1
		kobj_insysfs:		1
			dev_addr:	0000000017aba761
			device:		/dev/sda
			devid_kobj:	1


Clone_fs_devices is freed after forget

$ btrfs dev scan --forget

$ cat /proc/fs/btrfs/devlist | egrep "fsid|addr|device:|kobj"
[fsid: 91294469-9c0e-45c7-8dc7-af3134ba8cf4]
	fs_devs_addr:		000000005c8322d4  <-- fs_devices
	fsid_kobj_state:	1
	fsid_kobj_insysfs:	1
	kobj_state:		1
	kobj_insysfs:		1
		dev_addr:	0000000039f25fa1
		device:		/dev/sdb
		devid_kobj:		1
	[[seed_fsid: 938a1ff2-f493-4a9a-9ed3-3d2ace2f0fb2]]
		sprout_fsid:		91294469-9c0e-45c7-8dc7-af3134ba8cf4
		fs_devs_addr:		000000005e19db25 <-- seed_devcies
		fsid_kobj_state:	1
		fsid_kobj_insysfs:	1
		kobj_state:		1
		kobj_insysfs:		1
			dev_addr:	0000000017aba761
			device:		/dev/sda
			devid_kobj:		1



HTH

Anand
diff mbox series

Patch

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 190e59152be5..9b5e58091fae 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1149,45 +1149,48 @@  int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
-/* when one_device is NULL, it removes all device links */
-
-int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
-		struct btrfs_device *one_device)
+static void btrfs_sysfs_remove_device(struct btrfs_device *device)
 {
 	struct hd_struct *disk;
 	struct kobject *disk_kobj;
+	struct kobject *devices_kobj;
 
-	if (!fs_devices->devices_kobj)
-		return -EINVAL;
+	/*
+	 * Seed fs_devices devices_kobj aren't used, fetch kobject from the
+	 * fs_info::fs_devices.
+	 */
+	devices_kobj = device->fs_info->fs_devices->devices_kobj;
+	ASSERT(devices_kobj);
 
-	if (one_device) {
-		if (one_device->bdev) {
-			disk = one_device->bdev->bd_part;
-			disk_kobj = &part_to_dev(disk)->kobj;
-			sysfs_remove_link(fs_devices->devices_kobj,
-					  disk_kobj->name);
-		}
+	if (device->bdev) {
+		disk = device->bdev->bd_part;
+		disk_kobj = &part_to_dev(disk)->kobj;
+		sysfs_remove_link(devices_kobj, disk_kobj->name);
+	}
 
-		kobject_del(&one_device->devid_kobj);
-		kobject_put(&one_device->devid_kobj);
+	kobject_del(&device->devid_kobj);
+	kobject_put(&device->devid_kobj);
 
-		wait_for_completion(&one_device->kobj_unregister);
+	wait_for_completion(&device->kobj_unregister);
+}
 
+/* when 2nd argument device is NULL, it removes all devices link */
+int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
+				   struct btrfs_device *device)
+{
+	struct btrfs_fs_devices *seed_fs_devices;
+
+	if (device) {
+		btrfs_sysfs_remove_device(device);
 		return 0;
 	}
 
-	list_for_each_entry(one_device, &fs_devices->devices, dev_list) {
-
-		if (one_device->bdev) {
-			disk = one_device->bdev->bd_part;
-			disk_kobj = &part_to_dev(disk)->kobj;
-			sysfs_remove_link(fs_devices->devices_kobj,
-					  disk_kobj->name);
-		}
-		kobject_del(&one_device->devid_kobj);
-		kobject_put(&one_device->devid_kobj);
+	list_for_each_entry(device, &fs_devices->devices, dev_list)
+		btrfs_sysfs_remove_device(device);
 
-		wait_for_completion(&one_device->kobj_unregister);
+	list_for_each_entry(seed_fs_devices, &fs_devices->seed_list, seed_list) {
+		list_for_each_entry(device, &seed_fs_devices->devices, dev_list)
+			btrfs_sysfs_remove_device(device);
 	}
 
 	return 0;
@@ -1271,44 +1274,81 @@  static struct kobj_type devid_ktype = {
 	.release	= btrfs_release_devid_kobj,
 };
 
+static int btrfs_sysfs_add_device(struct btrfs_device *device)
+{
+	int ret;
+	struct kobject *devices_kobj;
+        struct kobject *devinfo_kobj;
+
+	/*
+	 * make sure we use the fs_info::fs_devices to fetch the kobjects
+	 * even for the seed fs_devices
+	 */
+	devices_kobj = device->fs_devices->fs_info->fs_devices->devices_kobj;
+	devinfo_kobj = device->fs_devices->fs_info->fs_devices->devinfo_kobj;
+	ASSERT(devices_kobj);
+	ASSERT(devinfo_kobj);
+
+	if (device->bdev) {
+		struct hd_struct *disk;
+		struct kobject *disk_kobj;
+
+		disk = device->bdev->bd_part;
+		disk_kobj = &part_to_dev(disk)->kobj;
+
+		ret = sysfs_create_link(devices_kobj, disk_kobj,
+					disk_kobj->name);
+		if (ret) {
+			btrfs_warn(device->fs_info,
+			   "sysfs create device link failed %d devid %llu",
+				   ret, device->devid);
+			return ret;
+		}
+	}
+
+	init_completion(&device->kobj_unregister);
+	ret = kobject_init_and_add(&device->devid_kobj, &devid_ktype,
+				   devinfo_kobj, "%llu", device->devid);
+	if (ret) {
+		kobject_put(&device->devid_kobj);
+		btrfs_warn(device->fs_info,
+			   "sysfs devinfo init failed %d devid %llu",
+			   ret, device->devid);
+	}
+
+	return ret;
+}
+
 int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
-				struct btrfs_device *one_device)
+				struct btrfs_device *device)
 {
-	int error = 0;
-	struct btrfs_device *dev;
+	int ret = 0;
 	unsigned int nofs_flag;
+	struct btrfs_fs_devices *seed_fs_devices;
 
 	nofs_flag = memalloc_nofs_save();
-	list_for_each_entry(dev, &fs_devices->devices, dev_list) {
 
-		if (one_device && one_device != dev)
-			continue;
+	if (device)
+		return btrfs_sysfs_add_device(device);
 
-		if (dev->bdev) {
-			struct hd_struct *disk;
-			struct kobject *disk_kobj;
-
-			disk = dev->bdev->bd_part;
-			disk_kobj = &part_to_dev(disk)->kobj;
-
-			error = sysfs_create_link(fs_devices->devices_kobj,
-						  disk_kobj, disk_kobj->name);
-			if (error)
-				break;
-		}
+	list_for_each_entry(device, &fs_devices->devices, dev_list) {
+		ret = btrfs_sysfs_add_device(device);
+		if (ret)
+			goto out;
+	}
 
-		init_completion(&dev->kobj_unregister);
-		error = kobject_init_and_add(&dev->devid_kobj, &devid_ktype,
-					     fs_devices->devinfo_kobj, "%llu",
-					     dev->devid);
-		if (error) {
-			kobject_put(&dev->devid_kobj);
-			break;
+	list_for_each_entry(seed_fs_devices, &fs_devices->seed_list, seed_list) {
+		list_for_each_entry(device, &seed_fs_devices->devices, dev_list) {
+			ret = btrfs_sysfs_add_device(device);
+			if (ret)
+				goto out;
 		}
 	}
+
+out:
 	memalloc_nofs_restore(nofs_flag);
 
-	return error;
+	return ret;
 }
 
 void btrfs_kobject_uevent(struct block_device *bdev, enum kobject_action action)