[v2,1/2] Btrfs: add more valid checks for superblock
diff mbox

Message ID 1464980715-6442-1-git-send-email-bo.li.liu@oracle.com
State New
Headers show

Commit Message

Liu Bo June 3, 2016, 7:05 p.m. UTC
This adds valid checks for super_total_bytes, super_bytes_used and
super_stripesize, super_num_devices.

Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Reported-by: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
---
v2:
 - Check super_num_devices and super_total_bytes after loading chunk
   tree.
 - Check super_bytes_used against the minimum space usage of a fresh
   mkfs.btrfs.
 - Fix super_stripesize to be sectorsize instead of 4096

 fs/btrfs/disk-io.c | 11 +++++++++++
 fs/btrfs/volumes.c | 24 ++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

Comments

David Sterba June 6, 2016, 8:37 a.m. UTC | #1
On Fri, Jun 03, 2016 at 12:05:14PM -0700, Liu Bo wrote:
> This adds valid checks for super_total_bytes, super_bytes_used and
> super_stripesize, super_num_devices.
> 
> Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
> Reported-by: Quentin Casasnovas <quentin.casasnovas@oracle.com>
> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>

Reviewed-by: David Sterba <dsterba@suse.com>

I'll switch the printk(KERN_ to the btrfs_* helpers and do minor tweaks
in the messages, but otherwise this version looks good to me. I don't
think we need the helper for stripe value checks as I proposed.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba Nov. 25, 2016, 4:50 p.m. UTC | #2
On Fri, Jun 03, 2016 at 12:05:14PM -0700, Liu Bo wrote:
> @@ -6648,6 +6648,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
>  	struct btrfs_key found_key;
>  	int ret;
>  	int slot;
> +	u64 total_dev = 0;
>  
>  	root = root->fs_info->chunk_root;
>  
> @@ -6689,6 +6690,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
>  			ret = read_one_dev(root, leaf, dev_item);
>  			if (ret)
>  				goto error;
> +			total_dev++;
>  		} else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
>  			struct btrfs_chunk *chunk;
>  			chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
> @@ -6698,6 +6700,28 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
>  		}
>  		path->slots[0]++;
>  	}
> +
> +	/*
> +	 * After loading chunk tree, we've got all device information,
> +	 * do another round of validation check.
> +	 */
> +	if (total_dev != root->fs_info->fs_devices->total_devices) {
> +		btrfs_err(root->fs_info,
> +	   "super_num_devices(%llu) mismatch with num_devices(%llu) found here",
> +			  btrfs_super_num_devices(root->fs_info->super_copy),
> +			  total_dev);
> +		ret = -EINVAL;

Turns out this check is too strict in combination with an intermediate
state and a bug in device deletion.

We've had several reports where a filesystem becomes unmountable due to
the above check, where the wrong value is just stored in the superblock
and is fixable by simply setting it to the right value. The right value
is number of dev items found in the dev tree, which is exactly the
total_dev that we read here.

The intermediate state can be caused by removing the device item in
btrfs_rm_device (see comment before btrfs_rm_dev_item call). This
apparently happens, when the device deletion is not finished, ie. the
superblock is not overwritten with a new copy.

So: do you agree to make it just a warning, and fixup the superblock
num_devices here? A userspace fix is also possible, but when this
happens and the filesystem is not mountable, a fix from outside of the
filesystem might be hard.

> +		goto error;
> +	}
> +	if (btrfs_super_total_bytes(root->fs_info->super_copy) <
> +	    root->fs_info->fs_devices->total_rw_bytes) {
> +		btrfs_err(root->fs_info,
> +	"super_total_bytes(%llu) mismatch with fs_devices total_rw_bytes(%llu)",
> +			  btrfs_super_total_bytes(root->fs_info->super_copy),
> +			  root->fs_info->fs_devices->total_rw_bytes);
> +		ret = -EINVAL;
> +		goto error;
> +	}
>  	ret = 0;
>  error:
>  	unlock_chunks(root);
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liu Bo Nov. 28, 2016, 8:07 p.m. UTC | #3
On Fri, Nov 25, 2016 at 05:50:19PM +0100, David Sterba wrote:
> On Fri, Jun 03, 2016 at 12:05:14PM -0700, Liu Bo wrote:
> > @@ -6648,6 +6648,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
> >  	struct btrfs_key found_key;
> >  	int ret;
> >  	int slot;
> > +	u64 total_dev = 0;
> >  
> >  	root = root->fs_info->chunk_root;
> >  
> > @@ -6689,6 +6690,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
> >  			ret = read_one_dev(root, leaf, dev_item);
> >  			if (ret)
> >  				goto error;
> > +			total_dev++;
> >  		} else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
> >  			struct btrfs_chunk *chunk;
> >  			chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
> > @@ -6698,6 +6700,28 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
> >  		}
> >  		path->slots[0]++;
> >  	}
> > +
> > +	/*
> > +	 * After loading chunk tree, we've got all device information,
> > +	 * do another round of validation check.
> > +	 */
> > +	if (total_dev != root->fs_info->fs_devices->total_devices) {
> > +		btrfs_err(root->fs_info,
> > +	   "super_num_devices(%llu) mismatch with num_devices(%llu) found here",
> > +			  btrfs_super_num_devices(root->fs_info->super_copy),
> > +			  total_dev);
> > +		ret = -EINVAL;
> 
> Turns out this check is too strict in combination with an intermediate
> state and a bug in device deletion.
> 
> We've had several reports where a filesystem becomes unmountable due to
> the above check, where the wrong value is just stored in the superblock
> and is fixable by simply setting it to the right value. The right value
> is number of dev items found in the dev tree, which is exactly the
> total_dev that we read here.
> 
> The intermediate state can be caused by removing the device item in
> btrfs_rm_device (see comment before btrfs_rm_dev_item call). This
> apparently happens, when the device deletion is not finished, ie. the
> superblock is not overwritten with a new copy.

I see.

Looks like currently it does commit_transaction in btrfs_rm_devi_item() while it
doesn't in btrfs_add_device, so it turns out that the logics in adding a device
and removing a device are a little bit different,

btrfs_rm_device                              btrfs_init_new_devices
  ...                                          ...
  ->btrfs_rm_dev_item                          ->btrfs_start_transaction()
    ->btrfs_start_transaction                  ->mutex_lock(&device_list_mutex)
    ->#rm dev item in chunk tree               ->list_add
    ->btrfs_commit_transaction                 ->btrfs_set_super_num_device
  ->mutex_lock(&device_list_mutex)             ->mutex_unlock(&device_list_mutex)
  ->list_del()                                 ->btrfs_add_device
  ->btrfs_set_super_num_device                 ->btrfs_commit_transaction()
  ->mutext_unlock(&device_list_mutex)


Not sure why we made it different around commit_transaction, but seems that we
can avoid the situation described above by changing how we play with transaction
in rm_device.

> 
> So: do you agree to make it just a warning, and fixup the superblock
> num_devices here? A userspace fix is also possible, but when this
> happens and the filesystem is not mountable, a fix from outside of the
> filesystem might be hard.

I'm OK with setting up a warning.

Thanks,

-liubo

> 
> > +		goto error;
> > +	}
> > +	if (btrfs_super_total_bytes(root->fs_info->super_copy) <
> > +	    root->fs_info->fs_devices->total_rw_bytes) {
> > +		btrfs_err(root->fs_info,
> > +	"super_total_bytes(%llu) mismatch with fs_devices total_rw_bytes(%llu)",
> > +			  btrfs_super_total_bytes(root->fs_info->super_copy),
> > +			  root->fs_info->fs_devices->total_rw_bytes);
> > +		ret = -EINVAL;
> > +		goto error;
> > +	}
> >  	ret = 0;
> >  error:
> >  	unlock_chunks(root);
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6628fca..ea78d77 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4130,6 +4130,17 @@  static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
 	 * Hint to catch really bogus numbers, bitflips or so, more exact checks are
 	 * done later
 	 */
+	if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
+		printk(KERN_ERR "BTRFS: bytes_used is too small %llu\n",
+		       btrfs_super_bytes_used(sb));
+		ret = -EINVAL;
+	}
+	if (!is_power_of_2(btrfs_super_stripesize(sb)) ||
+	    btrfs_super_stripesize(sb) != sectorsize) {
+		printk(KERN_ERR "BTRFS: invalid stripesize %u\n",
+		       btrfs_super_stripesize(sb));
+		ret = -EINVAL;
+	}
 	if (btrfs_super_num_devices(sb) > (1UL << 31))
 		printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
 				btrfs_super_num_devices(sb));
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bdc6256..d403ab6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6648,6 +6648,7 @@  int btrfs_read_chunk_tree(struct btrfs_root *root)
 	struct btrfs_key found_key;
 	int ret;
 	int slot;
+	u64 total_dev = 0;
 
 	root = root->fs_info->chunk_root;
 
@@ -6689,6 +6690,7 @@  int btrfs_read_chunk_tree(struct btrfs_root *root)
 			ret = read_one_dev(root, leaf, dev_item);
 			if (ret)
 				goto error;
+			total_dev++;
 		} else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
 			struct btrfs_chunk *chunk;
 			chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
@@ -6698,6 +6700,28 @@  int btrfs_read_chunk_tree(struct btrfs_root *root)
 		}
 		path->slots[0]++;
 	}
+
+	/*
+	 * After loading chunk tree, we've got all device information,
+	 * do another round of validation check.
+	 */
+	if (total_dev != root->fs_info->fs_devices->total_devices) {
+		btrfs_err(root->fs_info,
+	   "super_num_devices(%llu) mismatch with num_devices(%llu) found here",
+			  btrfs_super_num_devices(root->fs_info->super_copy),
+			  total_dev);
+		ret = -EINVAL;
+		goto error;
+	}
+	if (btrfs_super_total_bytes(root->fs_info->super_copy) <
+	    root->fs_info->fs_devices->total_rw_bytes) {
+		btrfs_err(root->fs_info,
+	"super_total_bytes(%llu) mismatch with fs_devices total_rw_bytes(%llu)",
+			  btrfs_super_total_bytes(root->fs_info->super_copy),
+			  root->fs_info->fs_devices->total_rw_bytes);
+		ret = -EINVAL;
+		goto error;
+	}
 	ret = 0;
 error:
 	unlock_chunks(root);