diff mbox series

[1/2] btrfs: scrub: maintain the unlock order in scrub thread

Message ID 1543223228-28232-2-git-send-email-anand.jain@oracle.com (mailing list archive)
State New, archived
Headers show
Series btrfs: scrub: fix scrub_lock | expand

Commit Message

Anand Jain Nov. 26, 2018, 9:07 a.m. UTC
The fs_info::device_list_mutex and fs_info::scrub_lock creates a
nested locks in btrfs_scrub_dev(). During the lock acquire the
hierarchy is fs_info::device_list_mutex and then fs_info::scrub_lock,
so following the same reverse order during unlock, that is
fs_info::scrub_lock and then fs_info::device_list_mutex.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/scrub.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

Comments

Nikolay Borisov Nov. 26, 2018, 9:47 a.m. UTC | #1
On 26.11.18 г. 11:07 ч., Anand Jain wrote:
> The fs_info::device_list_mutex and fs_info::scrub_lock creates a
> nested locks in btrfs_scrub_dev(). During the lock acquire the
> hierarchy is fs_info::device_list_mutex and then fs_info::scrub_lock,
> so following the same reverse order during unlock, that is
> fs_info::scrub_lock and then fs_info::device_list_mutex.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> ---
>  fs/btrfs/scrub.c | 16 +++++++---------
>  1 file changed, 7 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
> index 902819d3cf41..b1c2d1cdbd4b 100644
> --- a/fs/btrfs/scrub.c
> +++ b/fs/btrfs/scrub.c
> @@ -3865,7 +3865,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
>  	}
>  	sctx->readonly = readonly;
>  	dev->scrub_ctx = sctx;
> -	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
>  
>  	/*
>  	 * checking @scrub_pause_req here, we can avoid
> @@ -3875,15 +3874,14 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
>  	atomic_inc(&fs_info->scrubs_running);
>  	mutex_unlock(&fs_info->scrub_lock);
>  
> -	if (!is_dev_replace) {
> -		/*
> -		 * by holding device list mutex, we can
> -		 * kick off writing super in log tree sync.
> -		 */
> -		mutex_lock(&fs_info->fs_devices->device_list_mutex);
> +	/*
> +	 * by holding device list mutex, we can kick off writing super in log
> +	 * tree sync.
> +	 */
> +	if (!is_dev_replace)
>  		ret = scrub_supers(sctx, dev);
> -		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> -	}
> +
> +	mutex_unlock(&fs_info->fs_devices->device_list_mutex);

Have you considered whether this change will have any negative impact
due to the fact that __scrtub_blocked_if_needed can go to sleep for
arbitrary time with device_list_mutex held now ?

>  
>  	if (!ret)
>  		ret = scrub_enumerate_chunks(sctx, dev, start, end);
>
Anand Jain Nov. 27, 2018, 1:37 p.m. UTC | #2
On 11/26/2018 05:47 PM, Nikolay Borisov wrote:
> 
> 
> On 26.11.18 г. 11:07 ч., Anand Jain wrote:
>> The fs_info::device_list_mutex and fs_info::scrub_lock creates a
>> nested locks in btrfs_scrub_dev(). During the lock acquire the
>> hierarchy is fs_info::device_list_mutex and then fs_info::scrub_lock,
>> so following the same reverse order during unlock, that is
>> fs_info::scrub_lock and then fs_info::device_list_mutex.
>>
>> Signed-off-by: Anand Jain <anand.jain@oracle.com>
>> ---
>>   fs/btrfs/scrub.c | 16 +++++++---------
>>   1 file changed, 7 insertions(+), 9 deletions(-)
>>
>> diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
>> index 902819d3cf41..b1c2d1cdbd4b 100644
>> --- a/fs/btrfs/scrub.c
>> +++ b/fs/btrfs/scrub.c
>> @@ -3865,7 +3865,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
>>   	}
>>   	sctx->readonly = readonly;
>>   	dev->scrub_ctx = sctx;
>> -	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
>>   
>>   	/*
>>   	 * checking @scrub_pause_req here, we can avoid
>> @@ -3875,15 +3874,14 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
>>   	atomic_inc(&fs_info->scrubs_running);
>>   	mutex_unlock(&fs_info->scrub_lock);
>>   
>> -	if (!is_dev_replace) {
>> -		/*
>> -		 * by holding device list mutex, we can
>> -		 * kick off writing super in log tree sync.
>> -		 */
>> -		mutex_lock(&fs_info->fs_devices->device_list_mutex);
>> +	/*
>> +	 * by holding device list mutex, we can kick off writing super in log
>> +	 * tree sync.
>> +	 */
>> +	if (!is_dev_replace)
>>   		ret = scrub_supers(sctx, dev);
>> -		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
>> -	}
>> +
>> +	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> 
> Have you considered whether this change will have any negative impact
> due to the fact that __scrtub_blocked_if_needed can go to sleep for
> arbitrary time with device_list_mutex held now ?

  You are right. I missed that point. The device_list_mutex must not be
  blocked. In fact here we don't need the nested device_list_mutex and
  scrub_lock at all. I have comeup with a new fix [1] below separating
  them.

[1]
---------------------------------
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 902819d3cf41..db895ad23eda 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3830,42 +3830,37 @@ int btrfs_scrub_dev(struct btrfs_fs_info 
*fs_info, u64 devid, u64 start,
                 return -EROFS;
         }

-       mutex_lock(&fs_info->scrub_lock);
         if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
             test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
-               mutex_unlock(&fs_info->scrub_lock);
                 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
                 return -EIO;
         }
+       mutex_unlock(&fs_info->fs_devices->device_list_mutex);

         btrfs_dev_replace_read_lock(&fs_info->dev_replace);
         if (dev->scrub_ctx ||
             (!is_dev_replace &&
              btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
                 btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
-               mutex_unlock(&fs_info->scrub_lock);
-               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
                 return -EINPROGRESS;
         }
         btrfs_dev_replace_read_unlock(&fs_info->dev_replace);

+       mutex_lock(&fs_info->scrub_lock);
         ret = scrub_workers_get(fs_info, is_dev_replace);
         if (ret) {
                 mutex_unlock(&fs_info->scrub_lock);
-               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
                 return ret;
         }

         sctx = scrub_setup_ctx(dev, is_dev_replace);
         if (IS_ERR(sctx)) {
                 mutex_unlock(&fs_info->scrub_lock);
-               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
                 scrub_workers_put(fs_info);
                 return PTR_ERR(sctx);
         }
         sctx->readonly = readonly;
         dev->scrub_ctx = sctx;
-       mutex_unlock(&fs_info->fs_devices->device_list_mutex);

         /*
          * checking @scrub_pause_req here, we can avoid
------------------------------------------------

Will send v2.

Thanks, Anand



> 
>>   
>>   	if (!ret)
>>   		ret = scrub_enumerate_chunks(sctx, dev, start, end);
>>
diff mbox series

Patch

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 902819d3cf41..b1c2d1cdbd4b 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3865,7 +3865,6 @@  int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
 	}
 	sctx->readonly = readonly;
 	dev->scrub_ctx = sctx;
-	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 
 	/*
 	 * checking @scrub_pause_req here, we can avoid
@@ -3875,15 +3874,14 @@  int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
 	atomic_inc(&fs_info->scrubs_running);
 	mutex_unlock(&fs_info->scrub_lock);
 
-	if (!is_dev_replace) {
-		/*
-		 * by holding device list mutex, we can
-		 * kick off writing super in log tree sync.
-		 */
-		mutex_lock(&fs_info->fs_devices->device_list_mutex);
+	/*
+	 * by holding device list mutex, we can kick off writing super in log
+	 * tree sync.
+	 */
+	if (!is_dev_replace)
 		ret = scrub_supers(sctx, dev);
-		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-	}
+
+	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 
 	if (!ret)
 		ret = scrub_enumerate_chunks(sctx, dev, start, end);