diff mbox

btrfs: remove empty fs_devices to prevent memory runout

Message ID 1416449713-23561-1-git-send-email-guihc.fnst@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show

Commit Message

Gui Hecheng Nov. 20, 2014, 2:15 a.m. UTC
There is a global list @fs_uuids to keep @fs_devices object
for each created btrfs. But when a btrfs becomes "empty"
(all devices belong to it are gone), its @fs_devices remains
in @fs_uuids list until module exit.
If we keeps mkfs.btrfs on the same device again and again,
all empty @fs_devices produced are sure to eat up our memory.
So this case has better to be prevented.

I think that each time we setup btrfs on that device, we should
check whether we are stealing some device from another btrfs
seen before. To faciliate the search procedure, we could insert
all @btrfs_device in a rb_root, one @btrfs_device per each physical
device, with @bdev->bd_dev as key. Each time device stealing happens,
we should replace the corresponding @btrfs_device in the rb_root with
an up-to-date version.
If the stolen device is the last device in its @fs_devices,
then we have an empty btrfs to be deleted.

Actually there are 3 ways to steal devices and lead to empty btrfs
        1. mkfs, with -f option
        2. device add, with -f option
        3. device replace, with -f option
We should act under these cases.

Moreover, if there are seed devices, then it is asured that
the devices in cloned @fs_devices are not treated as valid devices.

Signed-off-by: Gui Hecheng <guihc.fnst@cn.fujitsu.com>
---
 fs/btrfs/super.c   |   1 +
 fs/btrfs/volumes.c | 181 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 fs/btrfs/volumes.h |   6 ++
 3 files changed, 172 insertions(+), 16 deletions(-)

Comments

Anand Jain Nov. 21, 2014, 11:03 a.m. UTC | #1
Hi Gui,

  Thanks for attempting this.

  There was this patch previously attempted for the same problem,
  which I had to nack..
    [PATCH 1/2] btrfs: device list could grow infinite

  I haven't seen your fix in full yet, But looks like you are using
  dev_t to suffice the problem of identifying unique device. wonder
  how does this would this react when device goes through the disappear
  and reappear events ?

Thanks, Anand


On 11/20/14 10:15 AM, Gui Hecheng wrote:
> There is a global list @fs_uuids to keep @fs_devices object
> for each created btrfs. But when a btrfs becomes "empty"
> (all devices belong to it are gone), its @fs_devices remains
> in @fs_uuids list until module exit.
> If we keeps mkfs.btrfs on the same device again and again,
> all empty @fs_devices produced are sure to eat up our memory.
> So this case has better to be prevented.
>
> I think that each time we setup btrfs on that device, we should
> check whether we are stealing some device from another btrfs
> seen before. To faciliate the search procedure, we could insert
> all @btrfs_device in a rb_root, one @btrfs_device per each physical
> device, with @bdev->bd_dev as key. Each time device stealing happens,
> we should replace the corresponding @btrfs_device in the rb_root with
> an up-to-date version.
> If the stolen device is the last device in its @fs_devices,
> then we have an empty btrfs to be deleted.
>
> Actually there are 3 ways to steal devices and lead to empty btrfs
>          1. mkfs, with -f option
>          2. device add, with -f option
>          3. device replace, with -f option
> We should act under these cases.
>
> Moreover, if there are seed devices, then it is asured that
> the devices in cloned @fs_devices are not treated as valid devices.
>
> Signed-off-by: Gui Hecheng <guihc.fnst@cn.fujitsu.com>
> ---
>   fs/btrfs/super.c   |   1 +
>   fs/btrfs/volumes.c | 181 ++++++++++++++++++++++++++++++++++++++++++++++++-----
>   fs/btrfs/volumes.h |   6 ++
>   3 files changed, 172 insertions(+), 16 deletions(-)
>
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index 54bd91e..ee09a56 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -2154,6 +2154,7 @@ static void __exit exit_btrfs_fs(void)
>   	btrfs_end_io_wq_exit();
>   	unregister_filesystem(&btrfs_fs_type);
>   	btrfs_exit_sysfs();
> +	btrfs_cleanup_valid_dev_root();
>   	btrfs_cleanup_fs_uuids();
>   	btrfs_exit_compress();
>   	btrfs_hash_exit();
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 0192051..ba86b1b 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -27,6 +27,7 @@
>   #include <linux/kthread.h>
>   #include <linux/raid/pq.h>
>   #include <linux/semaphore.h>
> +#include <linux/rbtree.h>
>   #include <asm/div64.h>
>   #include "ctree.h"
>   #include "extent_map.h"
> @@ -52,6 +53,120 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
>
>   DEFINE_MUTEX(uuid_mutex);
>   static LIST_HEAD(fs_uuids);
> +static struct rb_root valid_dev_root = RB_ROOT;
> +
> +static struct btrfs_device *insert_valid_device(struct btrfs_device *new_dev)
> +{
> +	struct rb_node **p;
> +	struct rb_node *parent;
> +	struct rb_node *new;
> +	struct btrfs_device *old_dev;
> +
> +	WARN_ON(!mutex_is_locked(&uuid_mutex));
> +
> +	parent = NULL;
> +	new = &new_dev->rb_node;
> +
> +	p = &valid_dev_root.rb_node;
> +	while (*p) {
> +		parent = *p;
> +		old_dev = rb_entry(parent, struct btrfs_device, rb_node);
> +
> +		if (new_dev->devnum < old_dev->devnum)
> +			p = &parent->rb_left;
> +		else if (new_dev->devnum > old_dev->devnum)
> +			p = &parent->rb_right;
> +		else {
> +			rb_replace_node(parent, new, &valid_dev_root);
> +			RB_CLEAR_NODE(parent);
> +
> +			goto out;
> +		}
> +	}
> +
> +	old_dev = NULL;
> +	rb_link_node(new, parent, p);
> +	rb_insert_color(new, &valid_dev_root);
> +
> +out:
> +	return old_dev;
> +}
> +
> +static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
> +{
> +	struct btrfs_device *device;
> +	WARN_ON(fs_devices->opened);
> +	while (!list_empty(&fs_devices->devices)) {
> +		device = list_entry(fs_devices->devices.next,
> +				    struct btrfs_device, dev_list);
> +		list_del(&device->dev_list);
> +		rcu_string_free(device->name);
> +		kfree(device);
> +	}
> +	kfree(fs_devices);
> +}
> +
> +static void remove_empty_fs_if_need(struct btrfs_fs_devices *old_fs)
> +{
> +	struct btrfs_fs_devices *seed_fs;
> +
> +	if (!list_empty(&old_fs->devices))
> +		return;
> +
> +	list_del(&old_fs->list);
> +
> +	/* free the seed clones */
> +	seed_fs = old_fs->seed;
> +	free_fs_devices(old_fs);
> +	while (seed_fs) {
> +		old_fs = seed_fs;
> +		seed_fs = seed_fs->seed;
> +		free_fs_devices(old_fs);
> +	}
> +
> +}
> +
> +static void replace_invalid_device(struct btrfs_device *new_dev)
> +{
> +	struct btrfs_device *invalid_dev;
> +	struct btrfs_fs_devices *old_fs;
> +
> +	WARN_ON(!mutex_is_locked(&uuid_mutex));
> +
> +	invalid_dev = insert_valid_device(new_dev);
> +	if (!invalid_dev)
> +		return;
> +
> +	old_fs = invalid_dev->fs_devices;
> +	mutex_lock(&old_fs->device_list_mutex);
> +	list_del(&invalid_dev->dev_list);
> +	rcu_string_free(invalid_dev->name);
> +	kfree(invalid_dev);
> +	mutex_unlock(&old_fs->device_list_mutex);
> +
> +	remove_empty_fs_if_need(old_fs);
> +}
> +
> +static void remove_valid_device(struct btrfs_device *old_dev)
> +{
> +	WARN_ON(!mutex_is_locked(&uuid_mutex));
> +
> +	if (!RB_EMPTY_NODE(&old_dev->rb_node)) {
> +		rb_erase(&old_dev->rb_node, &valid_dev_root);
> +		RB_CLEAR_NODE(&old_dev->rb_node);
> +	}
> +}
> +
> +void btrfs_cleanup_valid_dev_root(void)
> +{
> +	struct rb_node *rb_node;
> +
> +	rb_node = rb_first(&valid_dev_root);
> +	while (rb_node) {
> +		rb_erase(rb_node, &valid_dev_root);
> +		rb_node = rb_first(&valid_dev_root);
> +	}
> +}
>
>   static void lock_chunks(struct btrfs_root *root)
>   {
> @@ -106,20 +221,6 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
>   	return fs_devs;
>   }
>
> -static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
> -{
> -	struct btrfs_device *device;
> -	WARN_ON(fs_devices->opened);
> -	while (!list_empty(&fs_devices->devices)) {
> -		device = list_entry(fs_devices->devices.next,
> -				    struct btrfs_device, dev_list);
> -		list_del(&device->dev_list);
> -		rcu_string_free(device->name);
> -		kfree(device);
> -	}
> -	kfree(fs_devices);
> -}
> -
>   static void btrfs_kobject_uevent(struct block_device *bdev,
>   				 enum kobject_action action)
>   {
> @@ -165,6 +266,8 @@ static struct btrfs_device *__alloc_device(void)
>   	INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT);
>   	INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT);
>
> +	RB_CLEAR_NODE(&dev->rb_node);
> +
>   	return dev;
>   }
>
> @@ -461,7 +564,7 @@ static void pending_bios_fn(struct btrfs_work *work)
>    * < 0 - error
>    */
>   static noinline int device_list_add(const char *path,
> -			   struct btrfs_super_block *disk_super,
> +			   struct btrfs_super_block *disk_super, dev_t devnum,
>   			   u64 devid, struct btrfs_fs_devices **fs_devices_ret)
>   {
>   	struct btrfs_device *device;
> @@ -509,6 +612,8 @@ static noinline int device_list_add(const char *path,
>
>   		ret = 1;
>   		device->fs_devices = fs_devices;
> +		device->devnum = devnum;
> +		replace_invalid_device(device);
>   	} else if (!device->name || strcmp(device->name->str, path)) {
>   		/*
>   		 * When FS is already mounted.
> @@ -609,6 +714,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
>
>   		list_add(&device->dev_list, &fs_devices->devices);
>   		device->fs_devices = fs_devices;
> +		device->devnum = orig_dev->devnum;
>   		fs_devices->num_devices++;
>   	}
>   	mutex_unlock(&orig->device_list_mutex);
> @@ -619,6 +725,15 @@ error:
>   	return ERR_PTR(-ENOMEM);
>   }
>
> +/*
> + * If @fs_devices is not in global list @fs_uuids,
> + * then it is a cloned btrfs_fs_devices for seeding
> + */
> +static int is_cloned_fs_devices(struct btrfs_fs_devices *fs_devices)
> +{
> +	return list_empty(&fs_devices->list);
> +}
> +
>   void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
>   			       struct btrfs_fs_devices *fs_devices, int step)
>   {
> @@ -665,6 +780,10 @@ again:
>   				fs_devices->rw_devices--;
>   		}
>   		list_del_init(&device->dev_list);
> +
> +		/* skip cloned fs_devices which act as seed devices*/
> +		if (!is_cloned_fs_devices(fs_devices))
> +			remove_valid_device(device);
>   		fs_devices->num_devices--;
>   		rcu_string_free(device->name);
>   		kfree(device);
> @@ -740,6 +859,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
>
>   		list_replace_rcu(&device->dev_list, &new_device->dev_list);
>   		new_device->fs_devices = device->fs_devices;
> +		new_device->devnum = device->devnum;
> +
> +		/* skip cloned fs_devices which act as seed devices*/
> +		if (!is_cloned_fs_devices(device->fs_devices))
> +			insert_valid_device(new_device);
>
>   		call_rcu(&device->rcu, free_device);
>   	}
> @@ -952,7 +1076,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
>   	transid = btrfs_super_generation(disk_super);
>   	total_devices = btrfs_super_num_devices(disk_super);
>
> -	ret = device_list_add(path, disk_super, devid, fs_devices_ret);
> +	ret = device_list_add(path, disk_super, bdev->bd_dev,
> +				devid, fs_devices_ret);
>   	if (ret > 0) {
>   		if (disk_super->label[0]) {
>   			if (disk_super->label[BTRFS_LABEL_SIZE - 1])
> @@ -1682,6 +1807,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
>   	 */
>
>   	cur_devices = device->fs_devices;
> +	remove_valid_device(device);
>   	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
>   	list_del_rcu(&device->dev_list);
>
> @@ -1829,6 +1955,8 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
>
>   	if (srcdev->bdev)
>   		fs_devices->open_devices--;
> +
> +	remove_valid_device(srcdev);
>   }
>
>   void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
> @@ -1883,6 +2011,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
>   	if (tgtdev->bdev == fs_info->fs_devices->latest_bdev)
>   		fs_info->fs_devices->latest_bdev = next_device->bdev;
>   	list_del_rcu(&tgtdev->dev_list);
> +	remove_valid_device(tgtdev);
>
>   	call_rcu(&tgtdev->rcu, free_device);
>
> @@ -1975,12 +2104,22 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
>   		return PTR_ERR(old_devices);
>   	}
>
> +	/*
> +	 * Here @old_devices represent the fs_devices that will be linked
> +	 * in the fs_uuids, and devices in it should be valid.
> +	 * All devices in @fs_devices which will be moved into @seed_devices
> +	 * and they just act as clones. So replace those clones which sit
> +	 * in @dev_map_root for now with valid devices in @old_devices.
> +	 */
> +	list_for_each_entry(device, &old_devices->devices, dev_list)
> +		insert_valid_device(device);
>   	list_add(&old_devices->list, &fs_uuids);
>
>   	memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
>   	seed_devices->opened = 1;
>   	INIT_LIST_HEAD(&seed_devices->devices);
>   	INIT_LIST_HEAD(&seed_devices->alloc_list);
> +	INIT_LIST_HEAD(&seed_devices->list);
>   	mutex_init(&seed_devices->device_list_mutex);
>
>   	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
> @@ -2178,6 +2317,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
>   	}
>
>   	device->fs_devices = root->fs_info->fs_devices;
> +	device->devnum = bdev->bd_dev;
>
>   	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
>   	lock_chunks(root);
> @@ -2277,6 +2417,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
>   		ret = btrfs_commit_transaction(trans, root);
>   	}
>
> +	mutex_lock(&uuid_mutex);
> +	replace_invalid_device(device);
> +	mutex_unlock(&uuid_mutex);
> +
>   	/* Update ctime/mtime for libblkid */
>   	update_dev_time(device_path);
>   	return ret;
> @@ -2378,11 +2522,16 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
>   	device->dev_stats_valid = 1;
>   	set_blocksize(device->bdev, 4096);
>   	device->fs_devices = fs_info->fs_devices;
> +	device->devnum = bdev->bd_dev;
>   	list_add(&device->dev_list, &fs_info->fs_devices->devices);
>   	fs_info->fs_devices->num_devices++;
>   	fs_info->fs_devices->open_devices++;
>   	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
>
> +	mutex_lock(&uuid_mutex);
> +	replace_invalid_device(device);
> +	mutex_unlock(&uuid_mutex);
> +
>   	*device_out = device;
>   	return ret;
>
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index 4cc00e6..7b57cc6 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -80,6 +80,11 @@ struct btrfs_device {
>   	seqcount_t data_seqcount;
>   #endif
>
> +	struct rb_node rb_node;
> +
> +	/* node key in valid_dev_root */
> +	dev_t devnum;
> +
>   	/* the internal btrfs device id */
>   	u64 devid;
>
> @@ -418,6 +423,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
>   					const u64 *devid,
>   					const u8 *uuid);
>   int btrfs_rm_device(struct btrfs_root *root, char *device_path);
> +void btrfs_cleanup_valid_dev_root(void);
>   void btrfs_cleanup_fs_uuids(void);
>   int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
>   int btrfs_grow_device(struct btrfs_trans_handle *trans,
>
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gui Hecheng Nov. 24, 2014, 6:15 a.m. UTC | #2
On Fri, 2014-11-21 at 19:03 +0800, Anand Jain wrote:
> Hi Gui,
> 
>   Thanks for attempting this.
> 
>   There was this patch previously attempted for the same problem,
>   which I had to nack..
>     [PATCH 1/2] btrfs: device list could grow infinite
> 
>   I haven't seen your fix in full yet, But looks like you are using
>   dev_t to suffice the problem of identifying unique device. wonder
>   how does this would this react when device goes through the disappear
>   and reappear events ?
> 
> Thanks, Anand
> 

Hi Anand,

Thanks for your comments.
Definitly we are dealing with the same problem, but in different ways.
And as you see there are cases relating to replace & seed to handle.
With my patch, the device "stealing" problem which causes possible
memory leak has gone.

Oh, yes, the disappear & reappear problem is really a piece missing.
Disappear & reappear causes dev_t to change for the same physical
device, and there is certain to be "dead" btrfs_device objects that can
not be found by anybody in my rb_tree.
In my mind, check the rb_tree for "dead"s every time it changes is
neccessary to handle the problem.
But there should be more efficient ways to go, any suggestions?

Thanks,
Gui

> On 11/20/14 10:15 AM, Gui Hecheng wrote:
> > There is a global list @fs_uuids to keep @fs_devices object
> > for each created btrfs. But when a btrfs becomes "empty"
> > (all devices belong to it are gone), its @fs_devices remains
> > in @fs_uuids list until module exit.
> > If we keeps mkfs.btrfs on the same device again and again,
> > all empty @fs_devices produced are sure to eat up our memory.
> > So this case has better to be prevented.
> >
> > I think that each time we setup btrfs on that device, we should
> > check whether we are stealing some device from another btrfs
> > seen before. To faciliate the search procedure, we could insert
> > all @btrfs_device in a rb_root, one @btrfs_device per each physical
> > device, with @bdev->bd_dev as key. Each time device stealing happens,
> > we should replace the corresponding @btrfs_device in the rb_root with
> > an up-to-date version.
> > If the stolen device is the last device in its @fs_devices,
> > then we have an empty btrfs to be deleted.
> >
> > Actually there are 3 ways to steal devices and lead to empty btrfs
> >          1. mkfs, with -f option
> >          2. device add, with -f option
> >          3. device replace, with -f option
> > We should act under these cases.
> >
> > Moreover, if there are seed devices, then it is asured that
> > the devices in cloned @fs_devices are not treated as valid devices.
> >
> > Signed-off-by: Gui Hecheng <guihc.fnst@cn.fujitsu.com>
> > ---
> >   fs/btrfs/super.c   |   1 +
> >   fs/btrfs/volumes.c | 181 ++++++++++++++++++++++++++++++++++++++++++++++++-----
> >   fs/btrfs/volumes.h |   6 ++
> >   3 files changed, 172 insertions(+), 16 deletions(-)
> >
> > diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> > index 54bd91e..ee09a56 100644
> > --- a/fs/btrfs/super.c
> > +++ b/fs/btrfs/super.c
> > @@ -2154,6 +2154,7 @@ static void __exit exit_btrfs_fs(void)
> >   	btrfs_end_io_wq_exit();
> >   	unregister_filesystem(&btrfs_fs_type);
> >   	btrfs_exit_sysfs();
> > +	btrfs_cleanup_valid_dev_root();
> >   	btrfs_cleanup_fs_uuids();
> >   	btrfs_exit_compress();
> >   	btrfs_hash_exit();
> > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> > index 0192051..ba86b1b 100644
> > --- a/fs/btrfs/volumes.c
> > +++ b/fs/btrfs/volumes.c
> > @@ -27,6 +27,7 @@
> >   #include <linux/kthread.h>
> >   #include <linux/raid/pq.h>
> >   #include <linux/semaphore.h>
> > +#include <linux/rbtree.h>
> >   #include <asm/div64.h>
> >   #include "ctree.h"
> >   #include "extent_map.h"
> > @@ -52,6 +53,120 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
> >
> >   DEFINE_MUTEX(uuid_mutex);
> >   static LIST_HEAD(fs_uuids);
> > +static struct rb_root valid_dev_root = RB_ROOT;
> > +
> > +static struct btrfs_device *insert_valid_device(struct btrfs_device *new_dev)
> > +{
> > +	struct rb_node **p;
> > +	struct rb_node *parent;
> > +	struct rb_node *new;
> > +	struct btrfs_device *old_dev;
> > +
> > +	WARN_ON(!mutex_is_locked(&uuid_mutex));
> > +
> > +	parent = NULL;
> > +	new = &new_dev->rb_node;
> > +
> > +	p = &valid_dev_root.rb_node;
> > +	while (*p) {
> > +		parent = *p;
> > +		old_dev = rb_entry(parent, struct btrfs_device, rb_node);
> > +
> > +		if (new_dev->devnum < old_dev->devnum)
> > +			p = &parent->rb_left;
> > +		else if (new_dev->devnum > old_dev->devnum)
> > +			p = &parent->rb_right;
> > +		else {
> > +			rb_replace_node(parent, new, &valid_dev_root);
> > +			RB_CLEAR_NODE(parent);
> > +
> > +			goto out;
> > +		}
> > +	}
> > +
> > +	old_dev = NULL;
> > +	rb_link_node(new, parent, p);
> > +	rb_insert_color(new, &valid_dev_root);
> > +
> > +out:
> > +	return old_dev;
> > +}
> > +
> > +static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
> > +{
> > +	struct btrfs_device *device;
> > +	WARN_ON(fs_devices->opened);
> > +	while (!list_empty(&fs_devices->devices)) {
> > +		device = list_entry(fs_devices->devices.next,
> > +				    struct btrfs_device, dev_list);
> > +		list_del(&device->dev_list);
> > +		rcu_string_free(device->name);
> > +		kfree(device);
> > +	}
> > +	kfree(fs_devices);
> > +}
> > +
> > +static void remove_empty_fs_if_need(struct btrfs_fs_devices *old_fs)
> > +{
> > +	struct btrfs_fs_devices *seed_fs;
> > +
> > +	if (!list_empty(&old_fs->devices))
> > +		return;
> > +
> > +	list_del(&old_fs->list);
> > +
> > +	/* free the seed clones */
> > +	seed_fs = old_fs->seed;
> > +	free_fs_devices(old_fs);
> > +	while (seed_fs) {
> > +		old_fs = seed_fs;
> > +		seed_fs = seed_fs->seed;
> > +		free_fs_devices(old_fs);
> > +	}
> > +
> > +}
> > +
> > +static void replace_invalid_device(struct btrfs_device *new_dev)
> > +{
> > +	struct btrfs_device *invalid_dev;
> > +	struct btrfs_fs_devices *old_fs;
> > +
> > +	WARN_ON(!mutex_is_locked(&uuid_mutex));
> > +
> > +	invalid_dev = insert_valid_device(new_dev);
> > +	if (!invalid_dev)
> > +		return;
> > +
> > +	old_fs = invalid_dev->fs_devices;
> > +	mutex_lock(&old_fs->device_list_mutex);
> > +	list_del(&invalid_dev->dev_list);
> > +	rcu_string_free(invalid_dev->name);
> > +	kfree(invalid_dev);
> > +	mutex_unlock(&old_fs->device_list_mutex);
> > +
> > +	remove_empty_fs_if_need(old_fs);
> > +}
> > +
> > +static void remove_valid_device(struct btrfs_device *old_dev)
> > +{
> > +	WARN_ON(!mutex_is_locked(&uuid_mutex));
> > +
> > +	if (!RB_EMPTY_NODE(&old_dev->rb_node)) {
> > +		rb_erase(&old_dev->rb_node, &valid_dev_root);
> > +		RB_CLEAR_NODE(&old_dev->rb_node);
> > +	}
> > +}
> > +
> > +void btrfs_cleanup_valid_dev_root(void)
> > +{
> > +	struct rb_node *rb_node;
> > +
> > +	rb_node = rb_first(&valid_dev_root);
> > +	while (rb_node) {
> > +		rb_erase(rb_node, &valid_dev_root);
> > +		rb_node = rb_first(&valid_dev_root);
> > +	}
> > +}
> >
> >   static void lock_chunks(struct btrfs_root *root)
> >   {
> > @@ -106,20 +221,6 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
> >   	return fs_devs;
> >   }
> >
> > -static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
> > -{
> > -	struct btrfs_device *device;
> > -	WARN_ON(fs_devices->opened);
> > -	while (!list_empty(&fs_devices->devices)) {
> > -		device = list_entry(fs_devices->devices.next,
> > -				    struct btrfs_device, dev_list);
> > -		list_del(&device->dev_list);
> > -		rcu_string_free(device->name);
> > -		kfree(device);
> > -	}
> > -	kfree(fs_devices);
> > -}
> > -
> >   static void btrfs_kobject_uevent(struct block_device *bdev,
> >   				 enum kobject_action action)
> >   {
> > @@ -165,6 +266,8 @@ static struct btrfs_device *__alloc_device(void)
> >   	INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT);
> >   	INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT);
> >
> > +	RB_CLEAR_NODE(&dev->rb_node);
> > +
> >   	return dev;
> >   }
> >
> > @@ -461,7 +564,7 @@ static void pending_bios_fn(struct btrfs_work *work)
> >    * < 0 - error
> >    */
> >   static noinline int device_list_add(const char *path,
> > -			   struct btrfs_super_block *disk_super,
> > +			   struct btrfs_super_block *disk_super, dev_t devnum,
> >   			   u64 devid, struct btrfs_fs_devices **fs_devices_ret)
> >   {
> >   	struct btrfs_device *device;
> > @@ -509,6 +612,8 @@ static noinline int device_list_add(const char *path,
> >
> >   		ret = 1;
> >   		device->fs_devices = fs_devices;
> > +		device->devnum = devnum;
> > +		replace_invalid_device(device);
> >   	} else if (!device->name || strcmp(device->name->str, path)) {
> >   		/*
> >   		 * When FS is already mounted.
> > @@ -609,6 +714,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
> >
> >   		list_add(&device->dev_list, &fs_devices->devices);
> >   		device->fs_devices = fs_devices;
> > +		device->devnum = orig_dev->devnum;
> >   		fs_devices->num_devices++;
> >   	}
> >   	mutex_unlock(&orig->device_list_mutex);
> > @@ -619,6 +725,15 @@ error:
> >   	return ERR_PTR(-ENOMEM);
> >   }
> >
> > +/*
> > + * If @fs_devices is not in global list @fs_uuids,
> > + * then it is a cloned btrfs_fs_devices for seeding
> > + */
> > +static int is_cloned_fs_devices(struct btrfs_fs_devices *fs_devices)
> > +{
> > +	return list_empty(&fs_devices->list);
> > +}
> > +
> >   void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
> >   			       struct btrfs_fs_devices *fs_devices, int step)
> >   {
> > @@ -665,6 +780,10 @@ again:
> >   				fs_devices->rw_devices--;
> >   		}
> >   		list_del_init(&device->dev_list);
> > +
> > +		/* skip cloned fs_devices which act as seed devices*/
> > +		if (!is_cloned_fs_devices(fs_devices))
> > +			remove_valid_device(device);
> >   		fs_devices->num_devices--;
> >   		rcu_string_free(device->name);
> >   		kfree(device);
> > @@ -740,6 +859,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
> >
> >   		list_replace_rcu(&device->dev_list, &new_device->dev_list);
> >   		new_device->fs_devices = device->fs_devices;
> > +		new_device->devnum = device->devnum;
> > +
> > +		/* skip cloned fs_devices which act as seed devices*/
> > +		if (!is_cloned_fs_devices(device->fs_devices))
> > +			insert_valid_device(new_device);
> >
> >   		call_rcu(&device->rcu, free_device);
> >   	}
> > @@ -952,7 +1076,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
> >   	transid = btrfs_super_generation(disk_super);
> >   	total_devices = btrfs_super_num_devices(disk_super);
> >
> > -	ret = device_list_add(path, disk_super, devid, fs_devices_ret);
> > +	ret = device_list_add(path, disk_super, bdev->bd_dev,
> > +				devid, fs_devices_ret);
> >   	if (ret > 0) {
> >   		if (disk_super->label[0]) {
> >   			if (disk_super->label[BTRFS_LABEL_SIZE - 1])
> > @@ -1682,6 +1807,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
> >   	 */
> >
> >   	cur_devices = device->fs_devices;
> > +	remove_valid_device(device);
> >   	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
> >   	list_del_rcu(&device->dev_list);
> >
> > @@ -1829,6 +1955,8 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
> >
> >   	if (srcdev->bdev)
> >   		fs_devices->open_devices--;
> > +
> > +	remove_valid_device(srcdev);
> >   }
> >
> >   void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
> > @@ -1883,6 +2011,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
> >   	if (tgtdev->bdev == fs_info->fs_devices->latest_bdev)
> >   		fs_info->fs_devices->latest_bdev = next_device->bdev;
> >   	list_del_rcu(&tgtdev->dev_list);
> > +	remove_valid_device(tgtdev);
> >
> >   	call_rcu(&tgtdev->rcu, free_device);
> >
> > @@ -1975,12 +2104,22 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
> >   		return PTR_ERR(old_devices);
> >   	}
> >
> > +	/*
> > +	 * Here @old_devices represent the fs_devices that will be linked
> > +	 * in the fs_uuids, and devices in it should be valid.
> > +	 * All devices in @fs_devices which will be moved into @seed_devices
> > +	 * and they just act as clones. So replace those clones which sit
> > +	 * in @dev_map_root for now with valid devices in @old_devices.
> > +	 */
> > +	list_for_each_entry(device, &old_devices->devices, dev_list)
> > +		insert_valid_device(device);
> >   	list_add(&old_devices->list, &fs_uuids);
> >
> >   	memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
> >   	seed_devices->opened = 1;
> >   	INIT_LIST_HEAD(&seed_devices->devices);
> >   	INIT_LIST_HEAD(&seed_devices->alloc_list);
> > +	INIT_LIST_HEAD(&seed_devices->list);
> >   	mutex_init(&seed_devices->device_list_mutex);
> >
> >   	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
> > @@ -2178,6 +2317,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
> >   	}
> >
> >   	device->fs_devices = root->fs_info->fs_devices;
> > +	device->devnum = bdev->bd_dev;
> >
> >   	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
> >   	lock_chunks(root);
> > @@ -2277,6 +2417,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
> >   		ret = btrfs_commit_transaction(trans, root);
> >   	}
> >
> > +	mutex_lock(&uuid_mutex);
> > +	replace_invalid_device(device);
> > +	mutex_unlock(&uuid_mutex);
> > +
> >   	/* Update ctime/mtime for libblkid */
> >   	update_dev_time(device_path);
> >   	return ret;
> > @@ -2378,11 +2522,16 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
> >   	device->dev_stats_valid = 1;
> >   	set_blocksize(device->bdev, 4096);
> >   	device->fs_devices = fs_info->fs_devices;
> > +	device->devnum = bdev->bd_dev;
> >   	list_add(&device->dev_list, &fs_info->fs_devices->devices);
> >   	fs_info->fs_devices->num_devices++;
> >   	fs_info->fs_devices->open_devices++;
> >   	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
> >
> > +	mutex_lock(&uuid_mutex);
> > +	replace_invalid_device(device);
> > +	mutex_unlock(&uuid_mutex);
> > +
> >   	*device_out = device;
> >   	return ret;
> >
> > diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> > index 4cc00e6..7b57cc6 100644
> > --- a/fs/btrfs/volumes.h
> > +++ b/fs/btrfs/volumes.h
> > @@ -80,6 +80,11 @@ struct btrfs_device {
> >   	seqcount_t data_seqcount;
> >   #endif
> >
> > +	struct rb_node rb_node;
> > +
> > +	/* node key in valid_dev_root */
> > +	dev_t devnum;
> > +
> >   	/* the internal btrfs device id */
> >   	u64 devid;
> >
> > @@ -418,6 +423,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
> >   					const u64 *devid,
> >   					const u8 *uuid);
> >   int btrfs_rm_device(struct btrfs_root *root, char *device_path);
> > +void btrfs_cleanup_valid_dev_root(void);
> >   void btrfs_cleanup_fs_uuids(void);
> >   int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
> >   int btrfs_grow_device(struct btrfs_trans_handle *trans,
> >


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 54bd91e..ee09a56 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2154,6 +2154,7 @@  static void __exit exit_btrfs_fs(void)
 	btrfs_end_io_wq_exit();
 	unregister_filesystem(&btrfs_fs_type);
 	btrfs_exit_sysfs();
+	btrfs_cleanup_valid_dev_root();
 	btrfs_cleanup_fs_uuids();
 	btrfs_exit_compress();
 	btrfs_hash_exit();
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0192051..ba86b1b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -27,6 +27,7 @@ 
 #include <linux/kthread.h>
 #include <linux/raid/pq.h>
 #include <linux/semaphore.h>
+#include <linux/rbtree.h>
 #include <asm/div64.h>
 #include "ctree.h"
 #include "extent_map.h"
@@ -52,6 +53,120 @@  static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
 
 DEFINE_MUTEX(uuid_mutex);
 static LIST_HEAD(fs_uuids);
+static struct rb_root valid_dev_root = RB_ROOT;
+
+static struct btrfs_device *insert_valid_device(struct btrfs_device *new_dev)
+{
+	struct rb_node **p;
+	struct rb_node *parent;
+	struct rb_node *new;
+	struct btrfs_device *old_dev;
+
+	WARN_ON(!mutex_is_locked(&uuid_mutex));
+
+	parent = NULL;
+	new = &new_dev->rb_node;
+
+	p = &valid_dev_root.rb_node;
+	while (*p) {
+		parent = *p;
+		old_dev = rb_entry(parent, struct btrfs_device, rb_node);
+
+		if (new_dev->devnum < old_dev->devnum)
+			p = &parent->rb_left;
+		else if (new_dev->devnum > old_dev->devnum)
+			p = &parent->rb_right;
+		else {
+			rb_replace_node(parent, new, &valid_dev_root);
+			RB_CLEAR_NODE(parent);
+
+			goto out;
+		}
+	}
+
+	old_dev = NULL;
+	rb_link_node(new, parent, p);
+	rb_insert_color(new, &valid_dev_root);
+
+out:
+	return old_dev;
+}
+
+static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
+{
+	struct btrfs_device *device;
+	WARN_ON(fs_devices->opened);
+	while (!list_empty(&fs_devices->devices)) {
+		device = list_entry(fs_devices->devices.next,
+				    struct btrfs_device, dev_list);
+		list_del(&device->dev_list);
+		rcu_string_free(device->name);
+		kfree(device);
+	}
+	kfree(fs_devices);
+}
+
+static void remove_empty_fs_if_need(struct btrfs_fs_devices *old_fs)
+{
+	struct btrfs_fs_devices *seed_fs;
+
+	if (!list_empty(&old_fs->devices))
+		return;
+
+	list_del(&old_fs->list);
+
+	/* free the seed clones */
+	seed_fs = old_fs->seed;
+	free_fs_devices(old_fs);
+	while (seed_fs) {
+		old_fs = seed_fs;
+		seed_fs = seed_fs->seed;
+		free_fs_devices(old_fs);
+	}
+
+}
+
+static void replace_invalid_device(struct btrfs_device *new_dev)
+{
+	struct btrfs_device *invalid_dev;
+	struct btrfs_fs_devices *old_fs;
+
+	WARN_ON(!mutex_is_locked(&uuid_mutex));
+
+	invalid_dev = insert_valid_device(new_dev);
+	if (!invalid_dev)
+		return;
+
+	old_fs = invalid_dev->fs_devices;
+	mutex_lock(&old_fs->device_list_mutex);
+	list_del(&invalid_dev->dev_list);
+	rcu_string_free(invalid_dev->name);
+	kfree(invalid_dev);
+	mutex_unlock(&old_fs->device_list_mutex);
+
+	remove_empty_fs_if_need(old_fs);
+}
+
+static void remove_valid_device(struct btrfs_device *old_dev)
+{
+	WARN_ON(!mutex_is_locked(&uuid_mutex));
+
+	if (!RB_EMPTY_NODE(&old_dev->rb_node)) {
+		rb_erase(&old_dev->rb_node, &valid_dev_root);
+		RB_CLEAR_NODE(&old_dev->rb_node);
+	}
+}
+
+void btrfs_cleanup_valid_dev_root(void)
+{
+	struct rb_node *rb_node;
+
+	rb_node = rb_first(&valid_dev_root);
+	while (rb_node) {
+		rb_erase(rb_node, &valid_dev_root);
+		rb_node = rb_first(&valid_dev_root);
+	}
+}
 
 static void lock_chunks(struct btrfs_root *root)
 {
@@ -106,20 +221,6 @@  static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
 	return fs_devs;
 }
 
-static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
-{
-	struct btrfs_device *device;
-	WARN_ON(fs_devices->opened);
-	while (!list_empty(&fs_devices->devices)) {
-		device = list_entry(fs_devices->devices.next,
-				    struct btrfs_device, dev_list);
-		list_del(&device->dev_list);
-		rcu_string_free(device->name);
-		kfree(device);
-	}
-	kfree(fs_devices);
-}
-
 static void btrfs_kobject_uevent(struct block_device *bdev,
 				 enum kobject_action action)
 {
@@ -165,6 +266,8 @@  static struct btrfs_device *__alloc_device(void)
 	INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT);
 	INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT);
 
+	RB_CLEAR_NODE(&dev->rb_node);
+
 	return dev;
 }
 
@@ -461,7 +564,7 @@  static void pending_bios_fn(struct btrfs_work *work)
  * < 0 - error
  */
 static noinline int device_list_add(const char *path,
-			   struct btrfs_super_block *disk_super,
+			   struct btrfs_super_block *disk_super, dev_t devnum,
 			   u64 devid, struct btrfs_fs_devices **fs_devices_ret)
 {
 	struct btrfs_device *device;
@@ -509,6 +612,8 @@  static noinline int device_list_add(const char *path,
 
 		ret = 1;
 		device->fs_devices = fs_devices;
+		device->devnum = devnum;
+		replace_invalid_device(device);
 	} else if (!device->name || strcmp(device->name->str, path)) {
 		/*
 		 * When FS is already mounted.
@@ -609,6 +714,7 @@  static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
 
 		list_add(&device->dev_list, &fs_devices->devices);
 		device->fs_devices = fs_devices;
+		device->devnum = orig_dev->devnum;
 		fs_devices->num_devices++;
 	}
 	mutex_unlock(&orig->device_list_mutex);
@@ -619,6 +725,15 @@  error:
 	return ERR_PTR(-ENOMEM);
 }
 
+/*
+ * If @fs_devices is not in global list @fs_uuids,
+ * then it is a cloned btrfs_fs_devices for seeding
+ */
+static int is_cloned_fs_devices(struct btrfs_fs_devices *fs_devices)
+{
+	return list_empty(&fs_devices->list);
+}
+
 void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
 			       struct btrfs_fs_devices *fs_devices, int step)
 {
@@ -665,6 +780,10 @@  again:
 				fs_devices->rw_devices--;
 		}
 		list_del_init(&device->dev_list);
+
+		/* skip cloned fs_devices which act as seed devices*/
+		if (!is_cloned_fs_devices(fs_devices))
+			remove_valid_device(device);
 		fs_devices->num_devices--;
 		rcu_string_free(device->name);
 		kfree(device);
@@ -740,6 +859,11 @@  static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 
 		list_replace_rcu(&device->dev_list, &new_device->dev_list);
 		new_device->fs_devices = device->fs_devices;
+		new_device->devnum = device->devnum;
+
+		/* skip cloned fs_devices which act as seed devices*/
+		if (!is_cloned_fs_devices(device->fs_devices))
+			insert_valid_device(new_device);
 
 		call_rcu(&device->rcu, free_device);
 	}
@@ -952,7 +1076,8 @@  int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
 	transid = btrfs_super_generation(disk_super);
 	total_devices = btrfs_super_num_devices(disk_super);
 
-	ret = device_list_add(path, disk_super, devid, fs_devices_ret);
+	ret = device_list_add(path, disk_super, bdev->bd_dev,
+				devid, fs_devices_ret);
 	if (ret > 0) {
 		if (disk_super->label[0]) {
 			if (disk_super->label[BTRFS_LABEL_SIZE - 1])
@@ -1682,6 +1807,7 @@  int btrfs_rm_device(struct btrfs_root *root, char *device_path)
 	 */
 
 	cur_devices = device->fs_devices;
+	remove_valid_device(device);
 	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
 	list_del_rcu(&device->dev_list);
 
@@ -1829,6 +1955,8 @@  void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
 
 	if (srcdev->bdev)
 		fs_devices->open_devices--;
+
+	remove_valid_device(srcdev);
 }
 
 void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
@@ -1883,6 +2011,7 @@  void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 	if (tgtdev->bdev == fs_info->fs_devices->latest_bdev)
 		fs_info->fs_devices->latest_bdev = next_device->bdev;
 	list_del_rcu(&tgtdev->dev_list);
+	remove_valid_device(tgtdev);
 
 	call_rcu(&tgtdev->rcu, free_device);
 
@@ -1975,12 +2104,22 @@  static int btrfs_prepare_sprout(struct btrfs_root *root)
 		return PTR_ERR(old_devices);
 	}
 
+	/*
+	 * Here @old_devices represent the fs_devices that will be linked
+	 * in the fs_uuids, and devices in it should be valid.
+	 * All devices in @fs_devices which will be moved into @seed_devices
+	 * and they just act as clones. So replace those clones which sit
+	 * in @dev_map_root for now with valid devices in @old_devices.
+	 */
+	list_for_each_entry(device, &old_devices->devices, dev_list)
+		insert_valid_device(device);
 	list_add(&old_devices->list, &fs_uuids);
 
 	memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
 	seed_devices->opened = 1;
 	INIT_LIST_HEAD(&seed_devices->devices);
 	INIT_LIST_HEAD(&seed_devices->alloc_list);
+	INIT_LIST_HEAD(&seed_devices->list);
 	mutex_init(&seed_devices->device_list_mutex);
 
 	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
@@ -2178,6 +2317,7 @@  int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 	}
 
 	device->fs_devices = root->fs_info->fs_devices;
+	device->devnum = bdev->bd_dev;
 
 	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
 	lock_chunks(root);
@@ -2277,6 +2417,10 @@  int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 		ret = btrfs_commit_transaction(trans, root);
 	}
 
+	mutex_lock(&uuid_mutex);
+	replace_invalid_device(device);
+	mutex_unlock(&uuid_mutex);
+
 	/* Update ctime/mtime for libblkid */
 	update_dev_time(device_path);
 	return ret;
@@ -2378,11 +2522,16 @@  int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
 	device->dev_stats_valid = 1;
 	set_blocksize(device->bdev, 4096);
 	device->fs_devices = fs_info->fs_devices;
+	device->devnum = bdev->bd_dev;
 	list_add(&device->dev_list, &fs_info->fs_devices->devices);
 	fs_info->fs_devices->num_devices++;
 	fs_info->fs_devices->open_devices++;
 	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
+	mutex_lock(&uuid_mutex);
+	replace_invalid_device(device);
+	mutex_unlock(&uuid_mutex);
+
 	*device_out = device;
 	return ret;
 
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 4cc00e6..7b57cc6 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -80,6 +80,11 @@  struct btrfs_device {
 	seqcount_t data_seqcount;
 #endif
 
+	struct rb_node rb_node;
+
+	/* node key in valid_dev_root */
+	dev_t devnum;
+
 	/* the internal btrfs device id */
 	u64 devid;
 
@@ -418,6 +423,7 @@  struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
 					const u64 *devid,
 					const u8 *uuid);
 int btrfs_rm_device(struct btrfs_root *root, char *device_path);
+void btrfs_cleanup_valid_dev_root(void);
 void btrfs_cleanup_fs_uuids(void);
 int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
 int btrfs_grow_device(struct btrfs_trans_handle *trans,