Message ID | 558d3ae7ad53788c05810ae452cece7036316fb2.1667831845.git.anand.jain@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: move device->name rcu alloc and assign to btrfs_alloc_device() | expand |
On 2022/11/7 23:07, Anand Jain wrote: > There is a repeating code section in the parent function after calling > btrfs_alloc_device(), as below. > > name = rcu_string_strdup(path, GFP_...); > if (!name) { > btrfs_free_device(device); > return ERR_PTR(-ENOMEM); > } > rcu_assign_pointer(device->name, name); > > Except in add_missing_dev() for obvious reasons. > > This patch consolidates that repeating code into the btrfs_alloc_device() > itself so that the parent function doesn't have to duplicate code. > This consolidation also helps to review issues regarding rcu lock > violation with device->name. > > Parent function device_list_add() and add_missing_dev() uses GFP_NOFS for > the alloc, whereas the rest of the parent function uses GFP_KERNEL, so > bring the NOFS alloc context using memalloc_nofs_save() in the function > device_list_add() and add_missing_dev() is already doing it. I'm wondering do we really need to use RCU for device list? My understanding of this situation is, btrfs has very limited way to modifiy device list (device add/remove/replace), while most of our operations are read-only for device list. Can we just go regular semaphore and get rid of the RCU scheme completely? Thanks, Qu > > Signed-off-by: Anand Jain <anand.jain@oracle.com> > --- > fs/btrfs/dev-replace.c | 10 +------ > fs/btrfs/volumes.c | 64 ++++++++++++++++++------------------------ > fs/btrfs/volumes.h | 4 +-- > 3 files changed, 30 insertions(+), 48 deletions(-) > > diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c > index 84af2010fae2..9c4a8649a0f4 100644 > --- a/fs/btrfs/dev-replace.c > +++ b/fs/btrfs/dev-replace.c > @@ -249,7 +249,6 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, > struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; > struct btrfs_device *device; > struct block_device *bdev; > - struct rcu_string *name; > u64 devid = BTRFS_DEV_REPLACE_DEVID; > int ret = 0; > > @@ -293,19 +292,12 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, > } > > > - device = btrfs_alloc_device(NULL, &devid, NULL); > + device = btrfs_alloc_device(NULL, &devid, NULL, device_path); > if (IS_ERR(device)) { > ret = PTR_ERR(device); > goto error; > } > > - name = rcu_string_strdup(device_path, GFP_KERNEL); > - if (!name) { > - btrfs_free_device(device); > - ret = -ENOMEM; > - goto error; > - } > - rcu_assign_pointer(device->name, name); > ret = lookup_bdev(device_path, &device->devt); > if (ret) > goto error; > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > index 27fa43f5c4f4..7a9e6c40c053 100644 > --- a/fs/btrfs/volumes.c > +++ b/fs/btrfs/volumes.c > @@ -845,26 +845,23 @@ static noinline struct btrfs_device *device_list_add(const char *path, > } > > if (!device) { > + unsigned int nofs_flag; > + > if (fs_devices->opened) { > mutex_unlock(&fs_devices->device_list_mutex); > return ERR_PTR(-EBUSY); > } > > + nofs_flag = memalloc_nofs_save(); > device = btrfs_alloc_device(NULL, &devid, > - disk_super->dev_item.uuid); > + disk_super->dev_item.uuid, path); > + memalloc_nofs_restore(nofs_flag); > if (IS_ERR(device)) { > mutex_unlock(&fs_devices->device_list_mutex); > /* we can safely leave the fs_devices entry around */ > return device; > } > > - name = rcu_string_strdup(path, GFP_NOFS); > - if (!name) { > - btrfs_free_device(device); > - mutex_unlock(&fs_devices->device_list_mutex); > - return ERR_PTR(-ENOMEM); > - } > - rcu_assign_pointer(device->name, name); > device->devt = path_devt; > > list_add_rcu(&device->dev_list, &fs_devices->devices); > @@ -997,30 +994,18 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) > fs_devices->total_devices = orig->total_devices; > > list_for_each_entry(orig_dev, &orig->devices, dev_list) { > - struct rcu_string *name; > + const char *dev_path = NULL; > + > + if (orig_dev->name) > + dev_path = orig_dev->name->str; > > device = btrfs_alloc_device(NULL, &orig_dev->devid, > - orig_dev->uuid); > + orig_dev->uuid, dev_path); > if (IS_ERR(device)) { > ret = PTR_ERR(device); > goto error; > } > > - /* > - * This is ok to do without rcu read locked because we hold the > - * uuid mutex so nothing we touch in here is going to disappear. > - */ > - if (orig_dev->name) { > - name = rcu_string_strdup(orig_dev->name->str, > - GFP_KERNEL); > - if (!name) { > - btrfs_free_device(device); > - ret = -ENOMEM; > - goto error; > - } > - rcu_assign_pointer(device->name, name); > - } > - > list_add(&device->dev_list, &fs_devices->devices); > device->fs_devices = fs_devices; > fs_devices->num_devices++; > @@ -2592,7 +2577,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path > struct btrfs_device *device; > struct block_device *bdev; > struct super_block *sb = fs_info->sb; > - struct rcu_string *name; > struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; > struct btrfs_fs_devices *seed_devices; > u64 orig_super_total_bytes; > @@ -2633,20 +2617,13 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path > } > rcu_read_unlock(); > > - device = btrfs_alloc_device(fs_info, NULL, NULL); > + device = btrfs_alloc_device(fs_info, NULL, NULL, device_path); > if (IS_ERR(device)) { > /* we can safely leave the fs_devices entry around */ > ret = PTR_ERR(device); > goto error; > } > > - name = rcu_string_strdup(device_path, GFP_KERNEL); > - if (!name) { > - ret = -ENOMEM; > - goto error_free_device; > - } > - rcu_assign_pointer(device->name, name); > - > device->fs_info = fs_info; > device->bdev = bdev; > ret = lookup_bdev(device_path, &device->devt); > @@ -6986,8 +6963,9 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices, > * always do NOFS because we use it in a lot of other GFP_KERNEL safe > * places. > */ > + > nofs_flag = memalloc_nofs_save(); > - device = btrfs_alloc_device(NULL, &devid, dev_uuid); > + device = btrfs_alloc_device(NULL, &devid, dev_uuid, NULL); > memalloc_nofs_restore(nofs_flag); > if (IS_ERR(device)) > return device; > @@ -7011,14 +6989,15 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices, > * is generated. > * @uuid: a pointer to UUID for this device. If NULL a new UUID > * is generated. > + * @path: a pointer to device path if available, NULL otherwise. > * > * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR() > * on error. Returned struct is not linked onto any lists and must be > * destroyed with btrfs_free_device. > */ > struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, > - const u64 *devid, > - const u8 *uuid) > + const u64 *devid, const u8 *uuid, > + const char *path) > { > struct btrfs_device *dev; > u64 tmp; > @@ -7057,6 +7036,17 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, > else > generate_random_uuid(dev->uuid); > > + if (path) { > + struct rcu_string *name; > + > + name = rcu_string_strdup(path, GFP_KERNEL); > + if (!name) { > + btrfs_free_device(dev); > + return ERR_PTR(-ENOMEM); > + } > + rcu_assign_pointer(dev->name, name); > + } > + > return dev; > } > > diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h > index 6c7b5cf2de79..07eb5aecbe81 100644 > --- a/fs/btrfs/volumes.h > +++ b/fs/btrfs/volumes.h > @@ -649,8 +649,8 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, > struct btrfs_dev_lookup_args *args, > const char *path); > struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, > - const u64 *devid, > - const u8 *uuid); > + const u64 *devid, const u8 *uuid, > + const char *path); > void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args); > void btrfs_free_device(struct btrfs_device *device); > int btrfs_rm_device(struct btrfs_fs_info *fs_info,
On Tue, Nov 08, 2022 at 07:45:08AM +0800, Qu Wenruo wrote: > > > On 2022/11/7 23:07, Anand Jain wrote: > > There is a repeating code section in the parent function after calling > > btrfs_alloc_device(), as below. > > > > name = rcu_string_strdup(path, GFP_...); > > if (!name) { > > btrfs_free_device(device); > > return ERR_PTR(-ENOMEM); > > } > > rcu_assign_pointer(device->name, name); > > > > Except in add_missing_dev() for obvious reasons. > > > > This patch consolidates that repeating code into the btrfs_alloc_device() > > itself so that the parent function doesn't have to duplicate code. > > This consolidation also helps to review issues regarding rcu lock > > violation with device->name. > > > > Parent function device_list_add() and add_missing_dev() uses GFP_NOFS for > > the alloc, whereas the rest of the parent function uses GFP_KERNEL, so > > bring the NOFS alloc context using memalloc_nofs_save() in the function > > device_list_add() and add_missing_dev() is already doing it. > > I'm wondering do we really need to use RCU for device list? > > My understanding of this situation is, btrfs has very limited way to > modifiy device list (device add/remove/replace), while most of our > operations are read-only for device list. > > Can we just go regular semaphore and get rid of the RCU scheme completely? We can't get rid of RCU completely right now but as I read your question, it may be possible to unify the read/write accees to one locking primitive. I'd like to get rid of the RCU and have tried but some sort of locking is needed around device name as it can be chnaged and read independently from ioctl and many printks. But, the device list and RCU is a bit different. The RCU access to device list is used in the ioctls that basically iterate devices and read something. Here RCU is the most lightweight we can do, next is read side of a read-write primitive. I haven't looked closer for the device list, but the rwlock or semaphore could increase lock contention in case there are multiple readers and writers. So, an ioctl that reads device info (read) can block any write access (super block commit). Here the RCU allows to access the device list for read while super block commit can proceed. There are maybe more examples of the read-write interactions but I'd consider blocking super block write as a critical one so I pick it as first example. What we have now is is quite complex but works for all scenarios: device scan, device add/remove, all vs super block commit, mounted or unmounted, device name change at any time (vs printks). We could simplify that, but at the cost of increased contention "correct but slow", or "complex but you can hammer ioctls and your filesystem will write data in time". I don't want to discourage you or anybody from finding optimizations, small in terms of performance or plain code, like this patch. We might hit dead ends before finding some viable. I thought the RCU for device name can be replaced by som some locking scheme but it does not improve things, RCU (and the _rcu printk helpers) is still better.
On 08/11/2022 08:12, David Sterba wrote: > On Tue, Nov 08, 2022 at 07:45:08AM +0800, Qu Wenruo wrote: >> >> >> On 2022/11/7 23:07, Anand Jain wrote: >>> There is a repeating code section in the parent function after calling >>> btrfs_alloc_device(), as below. >>> >>> name = rcu_string_strdup(path, GFP_...); >>> if (!name) { >>> btrfs_free_device(device); >>> return ERR_PTR(-ENOMEM); >>> } >>> rcu_assign_pointer(device->name, name); >>> >>> Except in add_missing_dev() for obvious reasons. >>> >>> This patch consolidates that repeating code into the btrfs_alloc_device() >>> itself so that the parent function doesn't have to duplicate code. >>> This consolidation also helps to review issues regarding rcu lock >>> violation with device->name. >>> >>> Parent function device_list_add() and add_missing_dev() uses GFP_NOFS for >>> the alloc, whereas the rest of the parent function uses GFP_KERNEL, so >>> bring the NOFS alloc context using memalloc_nofs_save() in the function >>> device_list_add() and add_missing_dev() is already doing it. >> >> I'm wondering do we really need to use RCU for device list? >> >> My understanding of this situation is, btrfs has very limited way to >> modifiy device list (device add/remove/replace), while most of our >> operations are read-only for device list. >> >> Can we just go regular semaphore and get rid of the RCU scheme completely? > > We can't get rid of RCU completely right now but as I read your > question, it may be possible to unify the read/write accees to one > locking primitive. I'd like to get rid of the RCU and have tried but > some sort of locking is needed around device name as it can be chnaged > and read independently from ioctl and many printks. > > But, the device list and RCU is a bit different. The RCU access to > device list is used in the ioctls that basically iterate devices and > read something. Here RCU is the most lightweight we can do, next is read > side of a read-write primitive. > > I haven't looked closer for the device list, but the rwlock or semaphore > could increase lock contention in case there are multiple readers and > writers. So, an ioctl that reads device info (read) can block any write > access (super block commit). Here the RCU allows to access the device > list for read while super block commit can proceed. There are maybe more > examples of the read-write interactions but I'd consider blocking super > block write as a critical one so I pick it as first example. > > What we have now is is quite complex but works for all scenarios: device > scan, device add/remove, all vs super block commit, mounted or > unmounted, device name change at any time (vs printks). We could > simplify that, but at the cost of increased contention "correct but > slow", or "complex but you can hammer ioctls and your filesystem will > write data in time". > > I don't want to discourage you or anybody from finding optimizations, > small in terms of performance or plain code, like this patch. We might > hit dead ends before finding some viable. I thought the RCU for device > name can be replaced by som some locking scheme but it does not improve > things, RCU (and the _rcu printk helpers) is still better. Thanks for your thoughts on this. I don't see any better alternative other than RCU. Maybe if we consolidate RCU access, it will help us to stabilise it. Let's see.
On Mon, Nov 07, 2022 at 11:07:17PM +0800, Anand Jain wrote: > There is a repeating code section in the parent function after calling > btrfs_alloc_device(), as below. > > name = rcu_string_strdup(path, GFP_...); > if (!name) { > btrfs_free_device(device); > return ERR_PTR(-ENOMEM); > } > rcu_assign_pointer(device->name, name); > > Except in add_missing_dev() for obvious reasons. > > This patch consolidates that repeating code into the btrfs_alloc_device() > itself so that the parent function doesn't have to duplicate code. > This consolidation also helps to review issues regarding rcu lock > violation with device->name. > > Parent function device_list_add() and add_missing_dev() uses GFP_NOFS for > the alloc, whereas the rest of the parent function uses GFP_KERNEL, so > bring the NOFS alloc context using memalloc_nofs_save() in the function > device_list_add() and add_missing_dev() is already doing it. > > Signed-off-by: Anand Jain <anand.jain@oracle.com> Added to misc-next, thanks.
Hi Anand, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on kdave/for-next] [also build test WARNING on linus/master v6.1-rc4] [cannot apply to next-20221108] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Anand-Jain/btrfs-move-device-name-rcu-alloc-and-assign-to-btrfs_alloc_device/20221107-230806 base: https://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git for-next patch link: https://lore.kernel.org/r/558d3ae7ad53788c05810ae452cece7036316fb2.1667831845.git.anand.jain%40oracle.com patch subject: [PATCH] btrfs: move device->name rcu alloc and assign to btrfs_alloc_device() config: riscv-randconfig-s043-20221108 compiler: riscv64-linux-gcc (GCC) 12.1.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # apt-get install sparse # sparse version: v0.6.4-39-gce1a6720-dirty # https://github.com/intel-lab-lkp/linux/commit/084f70deaf67118927d4f0494ff5b3988eb77146 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Anand-Jain/btrfs-move-device-name-rcu-alloc-and-assign-to-btrfs_alloc_device/20221107-230806 git checkout 084f70deaf67118927d4f0494ff5b3988eb77146 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=riscv SHELL=/bin/bash fs/btrfs/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot <lkp@intel.com> sparse warnings: (new ones prefixed by >>) WARNING: invalid argument to '-march': '_zihintpause' fs/btrfs/volumes.c:409:31: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct rcu_string *str @@ got struct rcu_string [noderef] __rcu *name @@ fs/btrfs/volumes.c:409:31: sparse: expected struct rcu_string *str fs/btrfs/volumes.c:409:31: sparse: got struct rcu_string [noderef] __rcu *name fs/btrfs/volumes.c:617:43: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected char const *device_path @@ got char [noderef] __rcu * @@ fs/btrfs/volumes.c:617:43: sparse: expected char const *device_path fs/btrfs/volumes.c:617:43: sparse: got char [noderef] __rcu * fs/btrfs/volumes.c:884:50: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected char const * @@ got char [noderef] __rcu * @@ fs/btrfs/volumes.c:884:50: sparse: expected char const * fs/btrfs/volumes.c:884:50: sparse: got char [noderef] __rcu * fs/btrfs/volumes.c:954:39: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct rcu_string *str @@ got struct rcu_string [noderef] __rcu *name @@ fs/btrfs/volumes.c:954:39: sparse: expected struct rcu_string *str fs/btrfs/volumes.c:954:39: sparse: got struct rcu_string [noderef] __rcu *name >> fs/btrfs/volumes.c:1000:34: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected char const *dev_path @@ got char [noderef] __rcu * @@ fs/btrfs/volumes.c:1000:34: sparse: expected char const *dev_path fs/btrfs/volumes.c:1000:34: sparse: got char [noderef] __rcu * fs/btrfs/volumes.c:2182:49: sparse: sparse: incorrect type in argument 3 (different address spaces) @@ expected char const *device_path @@ got char [noderef] __rcu * @@ fs/btrfs/volumes.c:2182:49: sparse: expected char const *device_path fs/btrfs/volumes.c:2182:49: sparse: got char [noderef] __rcu * fs/btrfs/volumes.c:2297:41: sparse: sparse: incorrect type in argument 3 (different address spaces) @@ expected char const *device_path @@ got char [noderef] __rcu * @@ fs/btrfs/volumes.c:2297:41: sparse: expected char const *device_path fs/btrfs/volumes.c:2297:41: sparse: got char [noderef] __rcu * vim +1000 fs/btrfs/volumes.c 980 981 static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) 982 { 983 struct btrfs_fs_devices *fs_devices; 984 struct btrfs_device *device; 985 struct btrfs_device *orig_dev; 986 int ret = 0; 987 988 lockdep_assert_held(&uuid_mutex); 989 990 fs_devices = alloc_fs_devices(orig->fsid, NULL); 991 if (IS_ERR(fs_devices)) 992 return fs_devices; 993 994 fs_devices->total_devices = orig->total_devices; 995 996 list_for_each_entry(orig_dev, &orig->devices, dev_list) { 997 const char *dev_path = NULL; 998 999 if (orig_dev->name) > 1000 dev_path = orig_dev->name->str; 1001 1002 device = btrfs_alloc_device(NULL, &orig_dev->devid, 1003 orig_dev->uuid, dev_path); 1004 if (IS_ERR(device)) { 1005 ret = PTR_ERR(device); 1006 goto error; 1007 } 1008 1009 list_add(&device->dev_list, &fs_devices->devices); 1010 device->fs_devices = fs_devices; 1011 fs_devices->num_devices++; 1012 } 1013 return fs_devices; 1014 error: 1015 free_fs_devices(fs_devices); 1016 return ERR_PTR(ret); 1017 } 1018
On 11/9/22 13:25, kernel test robot wrote: > Hi Anand, > > Thank you for the patch! Perhaps something to improve: > > [auto build test WARNING on kdave/for-next] > [also build test WARNING on linus/master v6.1-rc4] > [cannot apply to next-20221108] > [If your patch is applied to the wrong git tree, kindly drop us a note. > And when submitting patch, we suggest to use '--base' as documented in > https://git-scm.com/docs/git-format-patch#_base_tree_information] > > url: https://github.com/intel-lab-lkp/linux/commits/Anand-Jain/btrfs-move-device-name-rcu-alloc-and-assign-to-btrfs_alloc_device/20221107-230806 > base: https://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git for-next > patch link: https://lore.kernel.org/r/558d3ae7ad53788c05810ae452cece7036316fb2.1667831845.git.anand.jain%40oracle.com > patch subject: [PATCH] btrfs: move device->name rcu alloc and assign to btrfs_alloc_device() > config: riscv-randconfig-s043-20221108 > compiler: riscv64-linux-gcc (GCC) 12.1.0 > reproduce: > wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross > chmod +x ~/bin/make.cross > # apt-get install sparse > # sparse version: v0.6.4-39-gce1a6720-dirty > # https://github.com/intel-lab-lkp/linux/commit/084f70deaf67118927d4f0494ff5b3988eb77146 > git remote add linux-review https://github.com/intel-lab-lkp/linux > git fetch --no-tags linux-review Anand-Jain/btrfs-move-device-name-rcu-alloc-and-assign-to-btrfs_alloc_device/20221107-230806 > git checkout 084f70deaf67118927d4f0494ff5b3988eb77146 > # save the config file > mkdir build_dir && cp config build_dir/.config > COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=riscv SHELL=/bin/bash fs/btrfs/ > > If you fix the issue, kindly add following tag where applicable > | Reported-by: kernel test robot <lkp@intel.com> > > sparse warnings: (new ones prefixed by >>) > WARNING: invalid argument to '-march': '_zihintpause' > fs/btrfs/volumes.c:409:31: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct rcu_string *str @@ got struct rcu_string [noderef] __rcu *name @@ > fs/btrfs/volumes.c:409:31: sparse: expected struct rcu_string *str > fs/btrfs/volumes.c:409:31: sparse: got struct rcu_string [noderef] __rcu *name > fs/btrfs/volumes.c:617:43: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected char const *device_path @@ got char [noderef] __rcu * @@ > fs/btrfs/volumes.c:617:43: sparse: expected char const *device_path > fs/btrfs/volumes.c:617:43: sparse: got char [noderef] __rcu * > fs/btrfs/volumes.c:884:50: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected char const * @@ got char [noderef] __rcu * @@ > fs/btrfs/volumes.c:884:50: sparse: expected char const * > fs/btrfs/volumes.c:884:50: sparse: got char [noderef] __rcu * > fs/btrfs/volumes.c:954:39: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct rcu_string *str @@ got struct rcu_string [noderef] __rcu *name @@ > fs/btrfs/volumes.c:954:39: sparse: expected struct rcu_string *str > fs/btrfs/volumes.c:954:39: sparse: got struct rcu_string [noderef] __rcu *name >>> fs/btrfs/volumes.c:1000:34: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected char const *dev_path @@ got char [noderef] __rcu * @@ > fs/btrfs/volumes.c:1000:34: sparse: expected char const *dev_path > fs/btrfs/volumes.c:1000:34: sparse: got char [noderef] __rcu * > fs/btrfs/volumes.c:2182:49: sparse: sparse: incorrect type in argument 3 (different address spaces) @@ expected char const *device_path @@ got char [noderef] __rcu * @@ > fs/btrfs/volumes.c:2182:49: sparse: expected char const *device_path > fs/btrfs/volumes.c:2182:49: sparse: got char [noderef] __rcu * > fs/btrfs/volumes.c:2297:41: sparse: sparse: incorrect type in argument 3 (different address spaces) @@ expected char const *device_path @@ got char [noderef] __rcu * @@ > fs/btrfs/volumes.c:2297:41: sparse: expected char const *device_path > fs/btrfs/volumes.c:2297:41: sparse: got char [noderef] __rcu * > > vim +1000 fs/btrfs/volumes.c > > 980 > 981 static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) > 982 { > 983 struct btrfs_fs_devices *fs_devices; > 984 struct btrfs_device *device; > 985 struct btrfs_device *orig_dev; > 986 int ret = 0; > 987 > 988 lockdep_assert_held(&uuid_mutex); > 989 > 990 fs_devices = alloc_fs_devices(orig->fsid, NULL); > 991 if (IS_ERR(fs_devices)) > 992 return fs_devices; > 993 > 994 fs_devices->total_devices = orig->total_devices; > 995 > 996 list_for_each_entry(orig_dev, &orig->devices, dev_list) { > 997 const char *dev_path = NULL; > 998 > 999 if (orig_dev->name) >> 1000 dev_path = orig_dev->name->str; David, Access like orig_dev->name->str isn't introduced in this patch. We have to restore the original comment about it. /* * This is ok to do without rcu read locked because we hold the * uuid mutex so nothing we touch in here is going to disappear. */ Thanks, Anand > 1001 > 1002 device = btrfs_alloc_device(NULL, &orig_dev->devid, > 1003 orig_dev->uuid, dev_path); > 1004 if (IS_ERR(device)) { > 1005 ret = PTR_ERR(device); > 1006 goto error; > 1007 } > 1008 > 1009 list_add(&device->dev_list, &fs_devices->devices); > 1010 device->fs_devices = fs_devices; > 1011 fs_devices->num_devices++; > 1012 } > 1013 return fs_devices; > 1014 error: > 1015 free_fs_devices(fs_devices); > 1016 return ERR_PTR(ret); > 1017 } > 1018 >
On Wed, Nov 09, 2022 at 05:21:12PM +0800, Anand Jain wrote: > On 11/9/22 13:25, kernel test robot wrote: > > Hi Anand, > > > > Thank you for the patch! Perhaps something to improve: > > > > vim +1000 fs/btrfs/volumes.c > > > > 980 > > 981 static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) > > 982 { > > 983 struct btrfs_fs_devices *fs_devices; > > 984 struct btrfs_device *device; > > 985 struct btrfs_device *orig_dev; > > 986 int ret = 0; > > 987 > > 988 lockdep_assert_held(&uuid_mutex); > > 989 > > 990 fs_devices = alloc_fs_devices(orig->fsid, NULL); > > 991 if (IS_ERR(fs_devices)) > > 992 return fs_devices; > > 993 > > 994 fs_devices->total_devices = orig->total_devices; > > 995 > > 996 list_for_each_entry(orig_dev, &orig->devices, dev_list) { > > 997 const char *dev_path = NULL; > > 998 > > 999 if (orig_dev->name) > >> 1000 dev_path = orig_dev->name->str; > > > David, > > Access like orig_dev->name->str isn't introduced in this patch. > We have to restore the original comment about it. > > /* > * This is ok to do without rcu read locked because we > hold the > * uuid mutex so nothing we touch in here is going to > disappear. > */ Yeah and I kept the comment when applying the patch.
On 11/9/22 18:28, David Sterba wrote: > On Wed, Nov 09, 2022 at 05:21:12PM +0800, Anand Jain wrote: >> On 11/9/22 13:25, kernel test robot wrote: >>> Hi Anand, >>> >>> Thank you for the patch! Perhaps something to improve: >>> >>> vim +1000 fs/btrfs/volumes.c >>> >>> 980 >>> 981 static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) >>> 982 { >>> 983 struct btrfs_fs_devices *fs_devices; >>> 984 struct btrfs_device *device; >>> 985 struct btrfs_device *orig_dev; >>> 986 int ret = 0; >>> 987 >>> 988 lockdep_assert_held(&uuid_mutex); >>> 989 >>> 990 fs_devices = alloc_fs_devices(orig->fsid, NULL); >>> 991 if (IS_ERR(fs_devices)) >>> 992 return fs_devices; >>> 993 >>> 994 fs_devices->total_devices = orig->total_devices; >>> 995 >>> 996 list_for_each_entry(orig_dev, &orig->devices, dev_list) { >>> 997 const char *dev_path = NULL; >>> 998 >>> 999 if (orig_dev->name) >>>> 1000 dev_path = orig_dev->name->str; >> >> >> David, >> >> Access like orig_dev->name->str isn't introduced in this patch. >> We have to restore the original comment about it. >> >> /* >> * This is ok to do without rcu read locked because we >> hold the >> * uuid mutex so nothing we touch in here is going to >> disappear. >> */ > > Yeah and I kept the comment when applying the patch. Ah. Thanks!
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 84af2010fae2..9c4a8649a0f4 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -249,7 +249,6 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; struct block_device *bdev; - struct rcu_string *name; u64 devid = BTRFS_DEV_REPLACE_DEVID; int ret = 0; @@ -293,19 +292,12 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, } - device = btrfs_alloc_device(NULL, &devid, NULL); + device = btrfs_alloc_device(NULL, &devid, NULL, device_path); if (IS_ERR(device)) { ret = PTR_ERR(device); goto error; } - name = rcu_string_strdup(device_path, GFP_KERNEL); - if (!name) { - btrfs_free_device(device); - ret = -ENOMEM; - goto error; - } - rcu_assign_pointer(device->name, name); ret = lookup_bdev(device_path, &device->devt); if (ret) goto error; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 27fa43f5c4f4..7a9e6c40c053 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -845,26 +845,23 @@ static noinline struct btrfs_device *device_list_add(const char *path, } if (!device) { + unsigned int nofs_flag; + if (fs_devices->opened) { mutex_unlock(&fs_devices->device_list_mutex); return ERR_PTR(-EBUSY); } + nofs_flag = memalloc_nofs_save(); device = btrfs_alloc_device(NULL, &devid, - disk_super->dev_item.uuid); + disk_super->dev_item.uuid, path); + memalloc_nofs_restore(nofs_flag); if (IS_ERR(device)) { mutex_unlock(&fs_devices->device_list_mutex); /* we can safely leave the fs_devices entry around */ return device; } - name = rcu_string_strdup(path, GFP_NOFS); - if (!name) { - btrfs_free_device(device); - mutex_unlock(&fs_devices->device_list_mutex); - return ERR_PTR(-ENOMEM); - } - rcu_assign_pointer(device->name, name); device->devt = path_devt; list_add_rcu(&device->dev_list, &fs_devices->devices); @@ -997,30 +994,18 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) fs_devices->total_devices = orig->total_devices; list_for_each_entry(orig_dev, &orig->devices, dev_list) { - struct rcu_string *name; + const char *dev_path = NULL; + + if (orig_dev->name) + dev_path = orig_dev->name->str; device = btrfs_alloc_device(NULL, &orig_dev->devid, - orig_dev->uuid); + orig_dev->uuid, dev_path); if (IS_ERR(device)) { ret = PTR_ERR(device); goto error; } - /* - * This is ok to do without rcu read locked because we hold the - * uuid mutex so nothing we touch in here is going to disappear. - */ - if (orig_dev->name) { - name = rcu_string_strdup(orig_dev->name->str, - GFP_KERNEL); - if (!name) { - btrfs_free_device(device); - ret = -ENOMEM; - goto error; - } - rcu_assign_pointer(device->name, name); - } - list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; fs_devices->num_devices++; @@ -2592,7 +2577,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path struct btrfs_device *device; struct block_device *bdev; struct super_block *sb = fs_info->sb; - struct rcu_string *name; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_fs_devices *seed_devices; u64 orig_super_total_bytes; @@ -2633,20 +2617,13 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path } rcu_read_unlock(); - device = btrfs_alloc_device(fs_info, NULL, NULL); + device = btrfs_alloc_device(fs_info, NULL, NULL, device_path); if (IS_ERR(device)) { /* we can safely leave the fs_devices entry around */ ret = PTR_ERR(device); goto error; } - name = rcu_string_strdup(device_path, GFP_KERNEL); - if (!name) { - ret = -ENOMEM; - goto error_free_device; - } - rcu_assign_pointer(device->name, name); - device->fs_info = fs_info; device->bdev = bdev; ret = lookup_bdev(device_path, &device->devt); @@ -6986,8 +6963,9 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices, * always do NOFS because we use it in a lot of other GFP_KERNEL safe * places. */ + nofs_flag = memalloc_nofs_save(); - device = btrfs_alloc_device(NULL, &devid, dev_uuid); + device = btrfs_alloc_device(NULL, &devid, dev_uuid, NULL); memalloc_nofs_restore(nofs_flag); if (IS_ERR(device)) return device; @@ -7011,14 +6989,15 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices, * is generated. * @uuid: a pointer to UUID for this device. If NULL a new UUID * is generated. + * @path: a pointer to device path if available, NULL otherwise. * * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR() * on error. Returned struct is not linked onto any lists and must be * destroyed with btrfs_free_device. */ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, - const u64 *devid, - const u8 *uuid) + const u64 *devid, const u8 *uuid, + const char *path) { struct btrfs_device *dev; u64 tmp; @@ -7057,6 +7036,17 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, else generate_random_uuid(dev->uuid); + if (path) { + struct rcu_string *name; + + name = rcu_string_strdup(path, GFP_KERNEL); + if (!name) { + btrfs_free_device(dev); + return ERR_PTR(-ENOMEM); + } + rcu_assign_pointer(dev->name, name); + } + return dev; } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 6c7b5cf2de79..07eb5aecbe81 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -649,8 +649,8 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, struct btrfs_dev_lookup_args *args, const char *path); struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, - const u64 *devid, - const u8 *uuid); + const u64 *devid, const u8 *uuid, + const char *path); void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args); void btrfs_free_device(struct btrfs_device *device); int btrfs_rm_device(struct btrfs_fs_info *fs_info,
There is a repeating code section in the parent function after calling btrfs_alloc_device(), as below. name = rcu_string_strdup(path, GFP_...); if (!name) { btrfs_free_device(device); return ERR_PTR(-ENOMEM); } rcu_assign_pointer(device->name, name); Except in add_missing_dev() for obvious reasons. This patch consolidates that repeating code into the btrfs_alloc_device() itself so that the parent function doesn't have to duplicate code. This consolidation also helps to review issues regarding rcu lock violation with device->name. Parent function device_list_add() and add_missing_dev() uses GFP_NOFS for the alloc, whereas the rest of the parent function uses GFP_KERNEL, so bring the NOFS alloc context using memalloc_nofs_save() in the function device_list_add() and add_missing_dev() is already doing it. Signed-off-by: Anand Jain <anand.jain@oracle.com> --- fs/btrfs/dev-replace.c | 10 +------ fs/btrfs/volumes.c | 64 ++++++++++++++++++------------------------ fs/btrfs/volumes.h | 4 +-- 3 files changed, 30 insertions(+), 48 deletions(-)