diff mbox series

[v3,4/4] btrfs: add helper btrfs_num_devices() to deduce num_devices

Message ID 20180802100934.12468-1-anand.jain@oracle.com (mailing list archive)
State New, archived
Headers show
Series None | expand

Commit Message

Anand Jain Aug. 2, 2018, 10:09 a.m. UTC
When the replace is running the fs_devices::num_devices also includes
the replace device, however in some operations like device delete and
balance it needs the actual num_devices without the repalce devices, so
now the function btrfs_num_devices() just provides that.

And here is a scenario how balance and repalce items could co-exist.
Consider balance is started and paused, now start the replace
followed by a power-recycle of the system. During following mount,
the open_ctree() first restarts the balance so it must check for the
replace device otherwise our num_devices calculation will be wrong.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
v2->v3: update changelog with not so obvious balance and repalce
co-existance secnario
v1->v2: add comments

 fs/btrfs/volumes.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

Comments

Nikolay Borisov Aug. 2, 2018, 10:11 a.m. UTC | #1
On  2.08.2018 13:09, Anand Jain wrote:
> When the replace is running the fs_devices::num_devices also includes
> the replace device, however in some operations like device delete and
> balance it needs the actual num_devices without the repalce devices, so
> now the function btrfs_num_devices() just provides that.
> 
> And here is a scenario how balance and repalce items could co-exist.
> Consider balance is started and paused, now start the replace
> followed by a power-recycle of the system. During following mount,
> the open_ctree() first restarts the balance so it must check for the
> replace device otherwise our num_devices calculation will be wrong.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> ---
> v2->v3: update changelog with not so obvious balance and repalce
> co-existance secnario
> v1->v2: add comments
> 
>  fs/btrfs/volumes.c | 32 ++++++++++++++++++--------------
>  1 file changed, 18 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index fe74fefc75f7..8844904f9009 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -1854,6 +1854,21 @@ void btrfs_assign_next_active_device(struct btrfs_device *device,
>  		fs_info->fs_devices->latest_bdev = next_device->bdev;
>  }
>  
> +/* Returns btrfs_fs_devices::num_devices excluding replace device if any */
> +static inline u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
> +{
> +	u64 num_devices = fs_info->fs_devices->num_devices;
> +
> +	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
> +	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
> +		BUG_ON(num_devices < 1);
> +		num_devices--;
> +	}
> +	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
> +
> +	return num_devices;
> +}
> +
>  int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
>  		u64 devid)
>  {
> @@ -1865,13 +1880,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
>  
>  	mutex_lock(&uuid_mutex);
>  
> -	num_devices = fs_devices->num_devices;
> -	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
> -	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
> -		BUG_ON(num_devices < 1);
> -		num_devices--;
> -	}
> -	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
> +	num_devices = btrfs_num_devices(fs_info);

How about lifting the BUG_ON from btrfs_num_devices into a check in this
function, so if num_devices < 1 then we just exit with -EINVAL or some
such. We should be aiming at eliminating BUG_ONs.

>  
>  	ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
>  	if (ret)
> @@ -3721,13 +3730,8 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
>  		}
>  	}
>  
> -	num_devices = fs_info->fs_devices->num_devices;
> -	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
> -	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
> -		BUG_ON(num_devices < 1);
> -		num_devices--;
> -	}
> -	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
> +	num_devices = btrfs_num_devices(fs_info);
> +
>  	allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE | BTRFS_BLOCK_GROUP_DUP;
>  	if (num_devices > 1)
>  		allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba Aug. 2, 2018, 12:21 p.m. UTC | #2
On Thu, Aug 02, 2018 at 01:11:40PM +0300, Nikolay Borisov wrote:
> 
> 
> On  2.08.2018 13:09, Anand Jain wrote:
> > When the replace is running the fs_devices::num_devices also includes
> > the replace device, however in some operations like device delete and
> > balance it needs the actual num_devices without the repalce devices, so
> > now the function btrfs_num_devices() just provides that.
> > 
> > And here is a scenario how balance and repalce items could co-exist.
> > Consider balance is started and paused, now start the replace
> > followed by a power-recycle of the system. During following mount,
> > the open_ctree() first restarts the balance so it must check for the
> > replace device otherwise our num_devices calculation will be wrong.
> > 
> > Signed-off-by: Anand Jain <anand.jain@oracle.com>
> > ---
> > v2->v3: update changelog with not so obvious balance and repalce
> > co-existance secnario
> > v1->v2: add comments
> > 
> >  fs/btrfs/volumes.c | 32 ++++++++++++++++++--------------
> >  1 file changed, 18 insertions(+), 14 deletions(-)
> > 
> > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> > index fe74fefc75f7..8844904f9009 100644
> > --- a/fs/btrfs/volumes.c
> > +++ b/fs/btrfs/volumes.c
> > @@ -1854,6 +1854,21 @@ void btrfs_assign_next_active_device(struct btrfs_device *device,
> >  		fs_info->fs_devices->latest_bdev = next_device->bdev;
> >  }
> >  
> > +/* Returns btrfs_fs_devices::num_devices excluding replace device if any */
> > +static inline u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)

This does not need to be static inline, it's not in a header.

> > +{
> > +	u64 num_devices = fs_info->fs_devices->num_devices;
> > +
> > +	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
> > +	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
> > +		BUG_ON(num_devices < 1);
> > +		num_devices--;
> > +	}
> > +	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
> > +
> > +	return num_devices;
> > +}
> > +
> >  int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
> >  		u64 devid)
> >  {
> > @@ -1865,13 +1880,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
> >  
> >  	mutex_lock(&uuid_mutex);
> >  
> > -	num_devices = fs_devices->num_devices;
> > -	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
> > -	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
> > -		BUG_ON(num_devices < 1);
> > -		num_devices--;
> > -	}
> > -	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
> > +	num_devices = btrfs_num_devices(fs_info);
> 
> How about lifting the BUG_ON from btrfs_num_devices into a check in this
> function, so if num_devices < 1 then we just exit with -EINVAL or some
> such. We should be aiming at eliminating BUG_ONs.

Right, in both cases it's possible to return with an error instead of
the BUG_ON.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Anand Jain Aug. 2, 2018, 1:07 p.m. UTC | #3
On 08/02/2018 08:21 PM, David Sterba wrote:
> On Thu, Aug 02, 2018 at 01:11:40PM +0300, Nikolay Borisov wrote:
>>
>>
>> On  2.08.2018 13:09, Anand Jain wrote:
>>> When the replace is running the fs_devices::num_devices also includes
>>> the replace device, however in some operations like device delete and
>>> balance it needs the actual num_devices without the repalce devices, so
>>> now the function btrfs_num_devices() just provides that.
>>>
>>> And here is a scenario how balance and repalce items could co-exist.
>>> Consider balance is started and paused, now start the replace
>>> followed by a power-recycle of the system. During following mount,
>>> the open_ctree() first restarts the balance so it must check for the
>>> replace device otherwise our num_devices calculation will be wrong.
>>>
>>> Signed-off-by: Anand Jain <anand.jain@oracle.com>
>>> ---
>>> v2->v3: update changelog with not so obvious balance and repalce
>>> co-existance secnario
>>> v1->v2: add comments
>>>
>>>   fs/btrfs/volumes.c | 32 ++++++++++++++++++--------------
>>>   1 file changed, 18 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>>> index fe74fefc75f7..8844904f9009 100644
>>> --- a/fs/btrfs/volumes.c
>>> +++ b/fs/btrfs/volumes.c
>>> @@ -1854,6 +1854,21 @@ void btrfs_assign_next_active_device(struct btrfs_device *device,
>>>   		fs_info->fs_devices->latest_bdev = next_device->bdev;
>>>   }
>>>   
>>> +/* Returns btrfs_fs_devices::num_devices excluding replace device if any */
>>> +static inline u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
> 
> This does not need to be static inline, it's not in a header.

ok will fix.

>>> +{
>>> +	u64 num_devices = fs_info->fs_devices->num_devices;
>>> +
>>> +	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
>>> +	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
>>> +		BUG_ON(num_devices < 1);
>>> +		num_devices--;
>>> +	}
>>> +	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
>>> +
>>> +	return num_devices;
>>> +}
>>> +
>>>   int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
>>>   		u64 devid)
>>>   {
>>> @@ -1865,13 +1880,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
>>>   
>>>   	mutex_lock(&uuid_mutex);
>>>   
>>> -	num_devices = fs_devices->num_devices;
>>> -	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
>>> -	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
>>> -		BUG_ON(num_devices < 1);
>>> -		num_devices--;
>>> -	}
>>> -	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
>>> +	num_devices = btrfs_num_devices(fs_info);
>>
>> How about lifting the BUG_ON from btrfs_num_devices into a check in this
>> function, so if num_devices < 1 then we just exit with -EINVAL or some
>> such. We should be aiming at eliminating BUG_ONs.
> 
> Right, in both cases it's possible to return with an error instead of
> the BUG_ON.

Actually we should just remove it as its a logical bug if num_devices < 
1, so long we didn't hit this bug which means its stable OR keep BUG_ON 
it until we add RAID-N. ?

Thanks, Anand


> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba Aug. 7, 2018, 3:02 p.m. UTC | #4
On Thu, Aug 02, 2018 at 09:07:00PM +0800, Anand Jain wrote:
> >>> -	num_devices = fs_devices->num_devices;
> >>> -	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
> >>> -	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
> >>> -		BUG_ON(num_devices < 1);
> >>> -		num_devices--;
> >>> -	}
> >>> -	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
> >>> +	num_devices = btrfs_num_devices(fs_info);
> >>
> >> How about lifting the BUG_ON from btrfs_num_devices into a check in this
> >> function, so if num_devices < 1 then we just exit with -EINVAL or some
> >> such. We should be aiming at eliminating BUG_ONs.
> > 
> > Right, in both cases it's possible to return with an error instead of
> > the BUG_ON.
> 
> Actually we should just remove it as its a logical bug if num_devices < 
> 1, so long we didn't hit this bug which means its stable OR keep BUG_ON 
> it until we add RAID-N. ?

I think the BUG_ON is redundant, all the sanity checks at mount time or
ioctl remove/replace make sure that there are enough devices to perform
the action. Still, it can be an assert, such things do not hurt and may
be catch other errors someday.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Anand Jain Aug. 7, 2018, 10:43 p.m. UTC | #5
On 08/07/2018 11:02 PM, David Sterba wrote:
> On Thu, Aug 02, 2018 at 09:07:00PM +0800, Anand Jain wrote:
>>>>> -	num_devices = fs_devices->num_devices;
>>>>> -	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
>>>>> -	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
>>>>> -		BUG_ON(num_devices < 1);
>>>>> -		num_devices--;
>>>>> -	}
>>>>> -	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
>>>>> +	num_devices = btrfs_num_devices(fs_info);
>>>>
>>>> How about lifting the BUG_ON from btrfs_num_devices into a check in this
>>>> function, so if num_devices < 1 then we just exit with -EINVAL or some
>>>> such. We should be aiming at eliminating BUG_ONs.
>>>
>>> Right, in both cases it's possible to return with an error instead of
>>> the BUG_ON.
>>
>> Actually we should just remove it as its a logical bug if num_devices <
>> 1, so long we didn't hit this bug which means its stable OR keep BUG_ON
>> it until we add RAID-N. ?
> 
> I think the BUG_ON is redundant, all the sanity checks at mount time or
> ioctl remove/replace make sure that there are enough devices to perform
> the action. Still, it can be an assert, such things do not hurt and may
> be catch other errors someday.

  Assert makes sense to me. Patch [1] shall be dropped
  [1]
  [PATCH] btrfs: handle the BUG_ON in btrfs_num_devices()

Thanks, Anand


> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox series

Patch

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index fe74fefc75f7..8844904f9009 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1854,6 +1854,21 @@  void btrfs_assign_next_active_device(struct btrfs_device *device,
 		fs_info->fs_devices->latest_bdev = next_device->bdev;
 }
 
+/* Returns btrfs_fs_devices::num_devices excluding replace device if any */
+static inline u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
+{
+	u64 num_devices = fs_info->fs_devices->num_devices;
+
+	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
+	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
+		BUG_ON(num_devices < 1);
+		num_devices--;
+	}
+	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
+
+	return num_devices;
+}
+
 int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
 		u64 devid)
 {
@@ -1865,13 +1880,7 @@  int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
 
 	mutex_lock(&uuid_mutex);
 
-	num_devices = fs_devices->num_devices;
-	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
-	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
-		BUG_ON(num_devices < 1);
-		num_devices--;
-	}
-	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
+	num_devices = btrfs_num_devices(fs_info);
 
 	ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
 	if (ret)
@@ -3721,13 +3730,8 @@  int btrfs_balance(struct btrfs_fs_info *fs_info,
 		}
 	}
 
-	num_devices = fs_info->fs_devices->num_devices;
-	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
-	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
-		BUG_ON(num_devices < 1);
-		num_devices--;
-	}
-	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
+	num_devices = btrfs_num_devices(fs_info);
+
 	allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE | BTRFS_BLOCK_GROUP_DUP;
 	if (num_devices > 1)
 		allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);